All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup
@ 2021-08-18 20:19 Richard Henderson
  2021-08-18 20:19 ` [PATCH v3 01/16] tcg/mips: Support unaligned access for user-only Richard Henderson
                   ` (16 more replies)
  0 siblings, 17 replies; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

Based-on: <20210818191920.390759-1-richard.henderson@linaro.org>
("[PATCH v3 00/66] Unaligned access for user-only")

Important points:
  * Support unaligned accesses.
  * Drop requirement for 256MB alignment of code_gen_buffer.
  * Improvements to tcg_out_movi:
    - Have a tb-relative register for mips64, reducing the
      code size for most pointers,
    - Try a few 3-insn sequences,
    - Drop everything else into a constant pool.


r~


Richard Henderson (16):
  tcg/mips: Support unaligned access for user-only
  tcg/mips: Support unaligned access for softmmu
  tcg/mips: Drop inline markers
  tcg/mips: Move TCG_AREG0 to S8
  tcg/mips: Move TCG_GUEST_BASE_REG to S7
  tcg/mips: Unify TCG_GUEST_BASE_REG tests
  tcg/mips: Allow JAL to be out of range in tcg_out_bswap_subr
  tcg/mips: Unset TCG_TARGET_HAS_direct_jump
  tcg/mips: Drop special alignment for code_gen_buffer
  tcg/mips: Create and use TCG_REG_TB
  tcg/mips: Split out tcg_out_movi_one
  tcg/mips: Split out tcg_out_movi_two
  tcg/mips: Use the constant pool for 64-bit constants
  tcg/mips: Aggressively use the constant pool for n64 calls
  tcg/mips: Try tb-relative addresses in tcg_out_movi
  tcg/mips: Try three insns with shift and add in tcg_out_movi

 tcg/mips/tcg-target.h     |  17 +-
 tcg/region.c              |  91 -----
 tcg/mips/tcg-target.c.inc | 730 +++++++++++++++++++++++++++++++-------
 3 files changed, 604 insertions(+), 234 deletions(-)

-- 
2.25.1



^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v3 01/16] tcg/mips: Support unaligned access for user-only
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 20:19 ` [PATCH v3 02/16] tcg/mips: Support unaligned access for softmmu Richard Henderson
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

This is kinda sorta the opposite of the other tcg hosts, where
we get (normal) alignment checks for free with host SIGBUS and
need to add code to support unaligned accesses.

Fortunately, the ISA contains pairs of instructions that are
used to implement unaligned memory accesses.  Use them.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.h     |   2 -
 tcg/mips/tcg-target.c.inc | 324 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 318 insertions(+), 8 deletions(-)

diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 3a62055f04..3afbb31918 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -209,8 +209,6 @@ extern bool use_mips32r2_instructions;
 
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
 
-#ifdef CONFIG_SOFTMMU
 #define TCG_TARGET_NEED_LDST_LABELS
-#endif
 
 #endif
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 02dc4b63ae..7ed0de9dae 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -24,6 +24,8 @@
  * THE SOFTWARE.
  */
 
+#include "../tcg-ldst.c.inc"
+
 #ifdef HOST_WORDS_BIGENDIAN
 # define MIPS_BE  1
 #else
@@ -230,16 +232,26 @@ typedef enum {
     OPC_ORI      = 015 << 26,
     OPC_XORI     = 016 << 26,
     OPC_LUI      = 017 << 26,
+    OPC_BNEL     = 025 << 26,
+    OPC_BNEZALC_R6 = 030 << 26,
     OPC_DADDIU   = 031 << 26,
+    OPC_LDL      = 032 << 26,
+    OPC_LDR      = 033 << 26,
     OPC_LB       = 040 << 26,
     OPC_LH       = 041 << 26,
+    OPC_LWL      = 042 << 26,
     OPC_LW       = 043 << 26,
     OPC_LBU      = 044 << 26,
     OPC_LHU      = 045 << 26,
+    OPC_LWR      = 046 << 26,
     OPC_LWU      = 047 << 26,
     OPC_SB       = 050 << 26,
     OPC_SH       = 051 << 26,
+    OPC_SWL      = 052 << 26,
     OPC_SW       = 053 << 26,
+    OPC_SDL      = 054 << 26,
+    OPC_SDR      = 055 << 26,
+    OPC_SWR      = 056 << 26,
     OPC_LD       = 067 << 26,
     OPC_SD       = 077 << 26,
 
@@ -1035,8 +1047,6 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
 }
 
 #if defined(CONFIG_SOFTMMU)
-#include "../tcg-ldst.c.inc"
-
 static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
     [MO_UB]   = helper_ret_ldub_mmu,
     [MO_SB]   = helper_ret_ldsb_mmu,
@@ -1344,7 +1354,82 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
     tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
     return true;
 }
-#endif
+
+#else
+
+static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
+                                   TCGReg addrhi, unsigned a_bits)
+{
+    unsigned a_mask = (1 << a_bits) - 1;
+    TCGLabelQemuLdst *l = new_ldst_label(s);
+
+    l->is_ld = is_ld;
+    l->addrlo_reg = addrlo;
+    l->addrhi_reg = addrhi;
+
+    /* We are expecting a_bits to max out at 7, much lower than ANDI. */
+    tcg_debug_assert(a_bits < 16);
+    tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
+
+    l->label_ptr[0] = s->code_ptr;
+    if (use_mips32r6_instructions) {
+        tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
+    } else {
+        tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
+        tcg_out_nop(s);
+    }
+
+    l->raddr = tcg_splitwx_to_rx(s->code_ptr);
+}
+
+static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    void *target;
+
+    if (!reloc_pc16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+        return false;
+    }
+
+    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+        /* A0 is env, A1 is skipped, A2:A3 is the uint64_t address. */
+        TCGReg a2 = MIPS_BE ? l->addrhi_reg : l->addrlo_reg;
+        TCGReg a3 = MIPS_BE ? l->addrlo_reg : l->addrhi_reg;
+
+        if (a3 != TCG_REG_A2) {
+            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2);
+            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3);
+        } else if (a2 != TCG_REG_A3) {
+            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3);
+            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2);
+        } else {
+            tcg_out_mov(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A2);
+            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, TCG_REG_A3);
+            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, TCG_TMP0);
+        }
+    } else {
+        tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
+    }
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
+
+    /*
+     * Tail call to the helper, with the return address back inline.
+     * We have arrived here via BNEL, so $31 is already set.
+     */
+    target = (l->is_ld ? helper_unaligned_ld : helper_unaligned_st);
+    tcg_out_call_int(s, target, true);
+    return true;
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    return tcg_out_fail_alignment(s, l);
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    return tcg_out_fail_alignment(s, l);
+}
+#endif /* SOFTMMU */
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
                                    TCGReg base, MemOp opc, bool is_64)
@@ -1450,6 +1535,117 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
     }
 }
 
+static void __attribute__((unused))
+tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
+                                    TCGReg base, MemOp opc, bool is_64)
+{
+    const MIPSInsn lw1 = MIPS_BE ? OPC_LWL : OPC_LWR;
+    const MIPSInsn lw2 = MIPS_BE ? OPC_LWR : OPC_LWL;
+    const MIPSInsn ld1 = MIPS_BE ? OPC_LDL : OPC_LDR;
+    const MIPSInsn ld2 = MIPS_BE ? OPC_LDR : OPC_LDL;
+
+    bool sgn = (opc & MO_SIGN);
+
+    switch (opc & (MO_SSIZE | MO_BSWAP)) {
+    case MO_SW | MO_BE:
+    case MO_UW | MO_BE:
+        tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0);
+        tcg_out_opc_imm(s, OPC_LBU, TCG_TMP1, base, 1);
+        tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
+        tcg_out_opc_reg(s, OPC_OR, lo, TCG_TMP0, TCG_TMP1);
+        break;
+
+    case MO_SW | MO_LE:
+    case MO_UW | MO_LE:
+        tcg_out_opc_imm(s, OPC_LBU, TCG_TMP0, base, 0);
+        tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP1, base, 1);
+        tcg_out_opc_sa(s, OPC_SLL, TCG_TMP1, TCG_TMP1, 8);
+        tcg_out_opc_reg(s, OPC_OR, lo, TCG_TMP0, TCG_TMP1);
+        break;
+
+    case MO_SL:
+    case MO_UL:
+        tcg_out_opc_imm(s, lw1, lo, base, 0);
+        tcg_out_opc_imm(s, lw2, lo, base, 3);
+        if (TCG_TARGET_REG_BITS == 64 && is_64 && !sgn) {
+            tcg_out_ext32u(s, lo, lo);
+        }
+        break;
+
+    case MO_UL | MO_BSWAP:
+    case MO_SL | MO_BSWAP:
+        if (use_mips32r2_instructions) {
+            tcg_out_opc_imm(s, lw1, lo, base, 0);
+            tcg_out_opc_imm(s, lw2, lo, base, 3);
+            tcg_out_bswap32(s, lo, lo,
+                            TCG_TARGET_REG_BITS == 64 && is_64
+                            ? (sgn ? TCG_BSWAP_OS : TCG_BSWAP_OZ) : 0);
+        } else {
+            const tcg_insn_unit *subr =
+                (TCG_TARGET_REG_BITS == 64 && is_64 && !sgn
+                 ? bswap32u_addr : bswap32_addr);
+
+            tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0);
+            tcg_out_bswap_subr(s, subr);
+            /* delay slot */
+            tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 3);
+            tcg_out_mov(s, is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, lo, TCG_TMP3);
+        }
+        break;
+
+    case MO_Q:
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_opc_imm(s, ld1, lo, base, 0);
+            tcg_out_opc_imm(s, ld2, lo, base, 7);
+        } else {
+            tcg_out_opc_imm(s, lw1, MIPS_BE ? hi : lo, base, 0 + 0);
+            tcg_out_opc_imm(s, lw2, MIPS_BE ? hi : lo, base, 0 + 3);
+            tcg_out_opc_imm(s, lw1, MIPS_BE ? lo : hi, base, 4 + 0);
+            tcg_out_opc_imm(s, lw2, MIPS_BE ? lo : hi, base, 4 + 3);
+        }
+        break;
+
+    case MO_Q | MO_BSWAP:
+        if (TCG_TARGET_REG_BITS == 64) {
+            if (use_mips32r2_instructions) {
+                tcg_out_opc_imm(s, ld1, lo, base, 0);
+                tcg_out_opc_imm(s, ld2, lo, base, 7);
+                tcg_out_bswap64(s, lo, lo);
+            } else {
+                tcg_out_opc_imm(s, ld1, TCG_TMP0, base, 0);
+                tcg_out_bswap_subr(s, bswap64_addr);
+                /* delay slot */
+                tcg_out_opc_imm(s, ld2, TCG_TMP0, base, 7);
+                tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
+            }
+        } else if (use_mips32r2_instructions) {
+            tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0);
+            tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3);
+            tcg_out_opc_imm(s, lw1, TCG_TMP1, base, 4 + 0);
+            tcg_out_opc_imm(s, lw2, TCG_TMP1, base, 4 + 3);
+            tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
+            tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
+            tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
+            tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
+        } else {
+            tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0);
+            tcg_out_bswap_subr(s, bswap32_addr);
+            /* delay slot */
+            tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3);
+            tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 4 + 0);
+            tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3);
+            tcg_out_bswap_subr(s, bswap32_addr);
+            /* delay slot */
+            tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 4 + 3);
+            tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3);
+        }
+        break;
+
+    default:
+        tcg_abort();
+    }
+}
+
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
 {
     TCGReg addr_regl, addr_regh __attribute__((unused));
@@ -1458,6 +1654,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
     MemOp opc;
 #if defined(CONFIG_SOFTMMU)
     tcg_insn_unit *label_ptr[2];
+#else
+    unsigned a_bits, s_bits;
 #endif
     TCGReg base = TCG_REG_A0;
 
@@ -1487,7 +1685,27 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
     } else {
         tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl);
     }
-    tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
+    a_bits = get_alignment_bits(opc);
+    s_bits = opc & MO_SIZE;
+    /*
+     * R6 removes the left/right instructions but requires the
+     * system to support misaligned memory accesses.
+     */
+    if (use_mips32r6_instructions) {
+        if (a_bits) {
+            tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits);
+        }
+        tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
+    } else {
+        if (a_bits && a_bits != s_bits) {
+            tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits);
+        }
+        if (a_bits >= s_bits) {
+            tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
+        } else {
+            tcg_out_qemu_ld_unalign(s, data_regl, data_regh, base, opc, is_64);
+        }
+    }
 #endif
 }
 
@@ -1552,6 +1770,79 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
     }
 }
 
+static void __attribute__((unused))
+tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
+                                    TCGReg base, MemOp opc)
+{
+    const MIPSInsn sw1 = MIPS_BE ? OPC_SWL : OPC_SWR;
+    const MIPSInsn sw2 = MIPS_BE ? OPC_SWR : OPC_SWL;
+    const MIPSInsn sd1 = MIPS_BE ? OPC_SDL : OPC_SDR;
+    const MIPSInsn sd2 = MIPS_BE ? OPC_SDR : OPC_SDL;
+
+    /* Don't clutter the code below with checks to avoid bswapping ZERO.  */
+    if ((lo | hi) == 0) {
+        opc &= ~MO_BSWAP;
+    }
+
+    switch (opc & (MO_SIZE | MO_BSWAP)) {
+    case MO_16 | MO_BE:
+        tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8);
+        tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 0);
+        tcg_out_opc_imm(s, OPC_SB, lo, base, 1);
+        break;
+
+    case MO_16 | MO_LE:
+        tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8);
+        tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
+        tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 1);
+        break;
+
+    case MO_32 | MO_BSWAP:
+        tcg_out_bswap32(s, TCG_TMP3, lo, 0);
+        lo = TCG_TMP3;
+        /* fall through */
+    case MO_32:
+        tcg_out_opc_imm(s, sw1, lo, base, 0);
+        tcg_out_opc_imm(s, sw2, lo, base, 3);
+        break;
+
+    case MO_64 | MO_BSWAP:
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_bswap64(s, TCG_TMP3, lo);
+            lo = TCG_TMP3;
+        } else if (use_mips32r2_instructions) {
+            tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? hi : lo);
+            tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? lo : hi);
+            tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
+            tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
+            hi = MIPS_BE ? TCG_TMP0 : TCG_TMP1;
+            lo = MIPS_BE ? TCG_TMP1 : TCG_TMP0;
+        } else {
+            tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0);
+            tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 0);
+            tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 3);
+            tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0);
+            tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 4);
+            tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 7);
+            break;
+        }
+        /* fall through */
+    case MO_64:
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_opc_imm(s, sd1, lo, base, 0);
+            tcg_out_opc_imm(s, sd2, lo, base, 7);
+        } else {
+            tcg_out_opc_imm(s, sw1, MIPS_BE ? hi : lo, base, 0);
+            tcg_out_opc_imm(s, sw2, MIPS_BE ? hi : lo, base, 3);
+            tcg_out_opc_imm(s, sw1, MIPS_BE ? lo : hi, base, 4);
+            tcg_out_opc_imm(s, sw2, MIPS_BE ? lo : hi, base, 7);
+        }
+        break;
+
+    default:
+        tcg_abort();
+    }
+}
 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 {
     TCGReg addr_regl, addr_regh __attribute__((unused));
@@ -1560,6 +1851,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
     MemOp opc;
 #if defined(CONFIG_SOFTMMU)
     tcg_insn_unit *label_ptr[2];
+#else
+    unsigned a_bits, s_bits;
 #endif
     TCGReg base = TCG_REG_A0;
 
@@ -1578,7 +1871,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
                         data_regl, data_regh, addr_regl, addr_regh,
                         s->code_ptr, label_ptr);
 #else
-    base = TCG_REG_A0;
     if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
         tcg_out_ext32u(s, base, addr_regl);
         addr_regl = base;
@@ -1590,7 +1882,27 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
     } else {
         tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl);
     }
-    tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
+    a_bits = get_alignment_bits(opc);
+    s_bits = opc & MO_SIZE;
+    /*
+     * R6 removes the left/right instructions but requires the
+     * system to support misaligned memory accesses.
+     */
+    if (use_mips32r6_instructions) {
+        if (a_bits) {
+            tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits);
+        }
+        tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
+    } else {
+        if (a_bits && a_bits != s_bits) {
+            tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits);
+        }
+        if (a_bits >= s_bits) {
+            tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
+        } else {
+            tcg_out_qemu_st_unalign(s, data_regl, data_regh, base, opc);
+        }
+    }
 #endif
 }
 
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 02/16] tcg/mips: Support unaligned access for softmmu
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
  2021-08-18 20:19 ` [PATCH v3 01/16] tcg/mips: Support unaligned access for user-only Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-21  6:39   ` Jiaxun Yang
  2021-08-18 20:19 ` [PATCH v3 03/16] tcg/mips: Drop inline markers Richard Henderson
                   ` (14 subsequent siblings)
  16 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

We can use the routines just added for user-only to emit
unaligned accesses in softmmu mode too.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.c.inc | 91 ++++++++++++++++++++++-----------------
 1 file changed, 51 insertions(+), 40 deletions(-)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 7ed0de9dae..3d6a0ba39e 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -1154,8 +1154,10 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
                              tcg_insn_unit *label_ptr[2], bool is_load)
 {
     MemOp opc = get_memop(oi);
-    unsigned s_bits = opc & MO_SIZE;
     unsigned a_bits = get_alignment_bits(opc);
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_mask = (1 << a_bits) - 1;
+    unsigned s_mask = (1 << s_bits) - 1;
     int mem_index = get_mmuidx(oi);
     int fast_off = TLB_MASK_TABLE_OFS(mem_index);
     int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
@@ -1163,7 +1165,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
     int add_off = offsetof(CPUTLBEntry, addend);
     int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
                    : offsetof(CPUTLBEntry, addr_write));
-    target_ulong mask;
+    target_ulong tlb_mask;
 
     /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
     tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
@@ -1177,27 +1179,13 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
     /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3.  */
     tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
 
-    /* We don't currently support unaligned accesses.
-       We could do so with mips32r6.  */
-    if (a_bits < s_bits) {
-        a_bits = s_bits;
-    }
-
-    /* Mask the page bits, keeping the alignment bits to compare against.  */
-    mask = (target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
-
     /* Load the (low-half) tlb comparator.  */
     if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
-        tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, mask);
+        tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
     } else {
         tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
                          : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
                      TCG_TMP0, TCG_TMP3, cmp_off);
-        tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, mask);
-        /* No second compare is required here;
-           load the tlb addend for the fast path.  */
-        tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
     }
 
     /* Zero extend a 32-bit guest address for a 64-bit host. */
@@ -1205,7 +1193,25 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
         tcg_out_ext32u(s, base, addrl);
         addrl = base;
     }
-    tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
+
+    /*
+     * Mask the page bits, keeping the alignment bits to compare against.
+     * For unaligned accesses, compare against the end of the access to
+     * verify that it does not cross a page boundary.
+     */
+    tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
+    if (a_mask >= s_mask) {
+        tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
+    } else {
+        tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrl, s_mask - a_mask);
+        tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
+    }
+
+    if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
+        /* Load the tlb addend for the fast path.  */
+        tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
+    }
 
     label_ptr[0] = s->code_ptr;
     tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
@@ -1213,7 +1219,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
     /* Load and test the high half tlb comparator.  */
     if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
         /* delay slot */
-        tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
+        tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
 
         /* Load the tlb addend for the fast path.  */
         tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
@@ -1535,8 +1541,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
     }
 }
 
-static void __attribute__((unused))
-tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
+static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
                                     TCGReg base, MemOp opc, bool is_64)
 {
     const MIPSInsn lw1 = MIPS_BE ? OPC_LWL : OPC_LWR;
@@ -1655,8 +1660,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
 #if defined(CONFIG_SOFTMMU)
     tcg_insn_unit *label_ptr[2];
 #else
-    unsigned a_bits, s_bits;
 #endif
+    unsigned a_bits, s_bits;
     TCGReg base = TCG_REG_A0;
 
     data_regl = *args++;
@@ -1665,10 +1670,20 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
     addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
     oi = *args++;
     opc = get_memop(oi);
+    a_bits = get_alignment_bits(opc);
+    s_bits = opc & MO_SIZE;
 
+    /*
+     * R6 removes the left/right instructions but requires the
+     * system to support misaligned memory accesses.
+     */
 #if defined(CONFIG_SOFTMMU)
     tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1);
-    tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
+    if (use_mips32r6_instructions || a_bits >= s_bits) {
+        tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
+    } else {
+        tcg_out_qemu_ld_unalign(s, data_regl, data_regh, base, opc, is_64);
+    }
     add_qemu_ldst_label(s, 1, oi,
                         (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
                         data_regl, data_regh, addr_regl, addr_regh,
@@ -1685,12 +1700,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
     } else {
         tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl);
     }
-    a_bits = get_alignment_bits(opc);
-    s_bits = opc & MO_SIZE;
-    /*
-     * R6 removes the left/right instructions but requires the
-     * system to support misaligned memory accesses.
-     */
     if (use_mips32r6_instructions) {
         if (a_bits) {
             tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits);
@@ -1770,8 +1779,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
     }
 }
 
-static void __attribute__((unused))
-tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
+static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
                                     TCGReg base, MemOp opc)
 {
     const MIPSInsn sw1 = MIPS_BE ? OPC_SWL : OPC_SWR;
@@ -1851,9 +1859,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
     MemOp opc;
 #if defined(CONFIG_SOFTMMU)
     tcg_insn_unit *label_ptr[2];
-#else
-    unsigned a_bits, s_bits;
 #endif
+    unsigned a_bits, s_bits;
     TCGReg base = TCG_REG_A0;
 
     data_regl = *args++;
@@ -1862,10 +1869,20 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
     addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
     oi = *args++;
     opc = get_memop(oi);
+    a_bits = get_alignment_bits(opc);
+    s_bits = opc & MO_SIZE;
 
+    /*
+     * R6 removes the left/right instructions but requires the
+     * system to support misaligned memory accesses.
+     */
 #if defined(CONFIG_SOFTMMU)
     tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 0);
-    tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
+    if (use_mips32r6_instructions || a_bits >= s_bits) {
+        tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
+    } else {
+        tcg_out_qemu_st_unalign(s, data_regl, data_regh, base, opc);
+    }
     add_qemu_ldst_label(s, 0, oi,
                         (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
                         data_regl, data_regh, addr_regl, addr_regh,
@@ -1882,12 +1899,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
     } else {
         tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl);
     }
-    a_bits = get_alignment_bits(opc);
-    s_bits = opc & MO_SIZE;
-    /*
-     * R6 removes the left/right instructions but requires the
-     * system to support misaligned memory accesses.
-     */
     if (use_mips32r6_instructions) {
         if (a_bits) {
             tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits);
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 03/16] tcg/mips: Drop inline markers
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
  2021-08-18 20:19 ` [PATCH v3 01/16] tcg/mips: Support unaligned access for user-only Richard Henderson
  2021-08-18 20:19 ` [PATCH v3 02/16] tcg/mips: Support unaligned access for softmmu Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 21:03   ` Philippe Mathieu-Daudé
  2021-08-18 20:19 ` [PATCH v3 04/16] tcg/mips: Move TCG_AREG0 to S8 Richard Henderson
                   ` (13 subsequent siblings)
  16 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

Let the compiler decide about inlining.
Remove tcg_out_ext8s and tcg_out_ext16s as unused.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.c.inc | 76 ++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 49 deletions(-)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 3d6a0ba39e..94f1bebdba 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -189,7 +189,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 #endif
 
 
-static inline bool is_p2m1(tcg_target_long val)
+static bool is_p2m1(tcg_target_long val)
 {
     return val && ((val + 1) & val) == 0;
 }
@@ -373,8 +373,8 @@ typedef enum {
 /*
  * Type reg
  */
-static inline void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc,
-                                   TCGReg rd, TCGReg rs, TCGReg rt)
+static void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc,
+                            TCGReg rd, TCGReg rs, TCGReg rt)
 {
     int32_t inst;
 
@@ -388,8 +388,8 @@ static inline void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc,
 /*
  * Type immediate
  */
-static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc,
-                                   TCGReg rt, TCGReg rs, TCGArg imm)
+static void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc,
+                            TCGReg rt, TCGReg rs, TCGArg imm)
 {
     int32_t inst;
 
@@ -403,8 +403,8 @@ static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc,
 /*
  * Type bitfield
  */
-static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt,
-                                  TCGReg rs, int msb, int lsb)
+static void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt,
+                           TCGReg rs, int msb, int lsb)
 {
     int32_t inst;
 
@@ -416,8 +416,8 @@ static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt,
     tcg_out32(s, inst);
 }
 
-static inline void tcg_out_opc_bf64(TCGContext *s, MIPSInsn opc, MIPSInsn opm,
-                                    MIPSInsn oph, TCGReg rt, TCGReg rs,
+static void tcg_out_opc_bf64(TCGContext *s, MIPSInsn opc, MIPSInsn opm,
+                             MIPSInsn oph, TCGReg rt, TCGReg rs,
                                     int msb, int lsb)
 {
     if (lsb >= 32) {
@@ -434,8 +434,7 @@ static inline void tcg_out_opc_bf64(TCGContext *s, MIPSInsn opc, MIPSInsn opm,
 /*
  * Type branch
  */
-static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc,
-                                  TCGReg rt, TCGReg rs)
+static void tcg_out_opc_br(TCGContext *s, MIPSInsn opc, TCGReg rt, TCGReg rs)
 {
     tcg_out_opc_imm(s, opc, rt, rs, 0);
 }
@@ -443,8 +442,8 @@ static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc,
 /*
  * Type sa
  */
-static inline void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc,
-                                  TCGReg rd, TCGReg rt, TCGArg sa)
+static void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc,
+                           TCGReg rd, TCGReg rt, TCGArg sa)
 {
     int32_t inst;
 
@@ -491,28 +490,27 @@ static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, const void *target)
     return true;
 }
 
-static inline void tcg_out_nop(TCGContext *s)
+static void tcg_out_nop(TCGContext *s)
 {
     tcg_out32(s, 0);
 }
 
-static inline void tcg_out_dsll(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+static void tcg_out_dsll(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
 {
     tcg_out_opc_sa64(s, OPC_DSLL, OPC_DSLL32, rd, rt, sa);
 }
 
-static inline void tcg_out_dsrl(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+static void tcg_out_dsrl(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
 {
     tcg_out_opc_sa64(s, OPC_DSRL, OPC_DSRL32, rd, rt, sa);
 }
 
-static inline void tcg_out_dsra(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+static void tcg_out_dsra(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
 {
     tcg_out_opc_sa64(s, OPC_DSRA, OPC_DSRA32, rd, rt, sa);
 }
 
-static inline bool tcg_out_mov(TCGContext *s, TCGType type,
-                               TCGReg ret, TCGReg arg)
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
 {
     /* Simple reg-reg move, optimising out the 'do nothing' case */
     if (ret != arg) {
@@ -624,27 +622,7 @@ static void tcg_out_bswap64(TCGContext *s, TCGReg ret, TCGReg arg)
     }
 }
 
-static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
-{
-    if (use_mips32r2_instructions) {
-        tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg);
-    } else {
-        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
-        tcg_out_opc_sa(s, OPC_SRA, ret, ret, 24);
-    }
-}
-
-static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg)
-{
-    if (use_mips32r2_instructions) {
-        tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg);
-    } else {
-        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 16);
-        tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16);
-    }
-}
-
-static inline void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
+static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
 {
     if (use_mips32r2_instructions) {
         tcg_out_opc_bf(s, OPC_DEXT, ret, arg, 31, 0);
@@ -668,8 +646,8 @@ static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data,
     tcg_out_opc_imm(s, opc, data, addr, lo);
 }
 
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
-                              TCGReg arg1, intptr_t arg2)
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+                       TCGReg arg1, intptr_t arg2)
 {
     MIPSInsn opc = OPC_LD;
     if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32) {
@@ -678,8 +656,8 @@ static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
     tcg_out_ldst(s, opc, arg, arg1, arg2);
 }
 
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
-                              TCGReg arg1, intptr_t arg2)
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+                       TCGReg arg1, intptr_t arg2)
 {
     MIPSInsn opc = OPC_SD;
     if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32) {
@@ -688,8 +666,8 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
     tcg_out_ldst(s, opc, arg, arg1, arg2);
 }
 
-static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
-                               TCGReg base, intptr_t ofs)
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+                        TCGReg base, intptr_t ofs)
 {
     if (val == 0) {
         tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
@@ -1960,9 +1938,9 @@ static void tcg_out_clz(TCGContext *s, MIPSInsn opcv2, MIPSInsn opcv6,
     }
 }
 
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
-                              const TCGArg args[TCG_MAX_OP_ARGS],
-                              const int const_args[TCG_MAX_OP_ARGS])
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+                       const TCGArg args[TCG_MAX_OP_ARGS],
+                       const int const_args[TCG_MAX_OP_ARGS])
 {
     MIPSInsn i1, i2;
     TCGArg a0, a1, a2;
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 04/16] tcg/mips: Move TCG_AREG0 to S8
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (2 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 03/16] tcg/mips: Drop inline markers Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 22:10   ` Philippe Mathieu-Daudé
  2021-08-18 20:19 ` [PATCH v3 05/16] tcg/mips: Move TCG_GUEST_BASE_REG to S7 Richard Henderson
                   ` (12 subsequent siblings)
  16 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

No functional change; just moving the saved reserved regs to the end.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.h     | 2 +-
 tcg/mips/tcg-target.c.inc | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 3afbb31918..c34cccebd3 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -80,7 +80,7 @@ typedef enum {
     TCG_REG_RA,
 
     TCG_REG_CALL_STACK = TCG_REG_SP,
-    TCG_AREG0 = TCG_REG_S0,
+    TCG_AREG0 = TCG_REG_S8,
 } TCGReg;
 
 /* used for function call generation */
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 94f1bebdba..92bde50704 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -2542,7 +2542,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 }
 
 static const int tcg_target_callee_save_regs[] = {
-    TCG_REG_S0,       /* used for the global env (TCG_AREG0) */
+    TCG_REG_S0,
     TCG_REG_S1,
     TCG_REG_S2,
     TCG_REG_S3,
@@ -2550,7 +2550,7 @@ static const int tcg_target_callee_save_regs[] = {
     TCG_REG_S5,
     TCG_REG_S6,
     TCG_REG_S7,
-    TCG_REG_S8,
+    TCG_REG_S8,       /* used for the global env (TCG_AREG0) */
     TCG_REG_RA,       /* should be last for ABI compliance */
 };
 
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 05/16] tcg/mips: Move TCG_GUEST_BASE_REG to S7
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (3 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 04/16] tcg/mips: Move TCG_AREG0 to S8 Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 22:11   ` Philippe Mathieu-Daudé
  2021-08-18 20:19 ` [PATCH v3 06/16] tcg/mips: Unify TCG_GUEST_BASE_REG tests Richard Henderson
                   ` (11 subsequent siblings)
  16 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

No functional change; just moving the saved reserved regs to the end.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.c.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 92bde50704..b3a2cc88ab 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -86,7 +86,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 #define TCG_TMP3  TCG_REG_T7
 
 #ifndef CONFIG_SOFTMMU
-#define TCG_GUEST_BASE_REG TCG_REG_S1
+#define TCG_GUEST_BASE_REG TCG_REG_S7
 #endif
 
 /* check if we really need so many registers :P */
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 06/16] tcg/mips: Unify TCG_GUEST_BASE_REG tests
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (4 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 05/16] tcg/mips: Move TCG_GUEST_BASE_REG to S7 Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 20:19 ` [PATCH v3 07/16] tcg/mips: Allow JAL to be out of range in tcg_out_bswap_subr Richard Henderson
                   ` (10 subsequent siblings)
  16 siblings, 0 replies; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

In tcg_out_qemu_ld/st, we already check for guest_base matching int16_t.
Mirror that when setting up TCG_GUEST_BASE_REG in the prologue.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.c.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index b3a2cc88ab..4019c22f3c 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -2671,7 +2671,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     }
 
 #ifndef CONFIG_SOFTMMU
-    if (guest_base) {
+    if (guest_base != (int16_t)guest_base) {
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
         tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
     }
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 07/16] tcg/mips: Allow JAL to be out of range in tcg_out_bswap_subr
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (5 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 06/16] tcg/mips: Unify TCG_GUEST_BASE_REG tests Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-09-20 21:14   ` Philippe Mathieu-Daudé
  2021-08-18 20:19 ` [PATCH v3 08/16] tcg/mips: Unset TCG_TARGET_HAS_direct_jump Richard Henderson
                   ` (9 subsequent siblings)
  16 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

Weaning off of unique alignment requirements, so allow JAL
to not reach the target.  TCG_TMP1 is always available for
use as a scratch because it is clobbered by the subroutine
being called.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.c.inc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 4019c22f3c..c65c4ee1f8 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -585,8 +585,10 @@ static void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg, int flags)
 
 static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub)
 {
-    bool ok = tcg_out_opc_jmp(s, OPC_JAL, sub);
-    tcg_debug_assert(ok);
+    if (!tcg_out_opc_jmp(s, OPC_JAL, sub)) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP1, (uintptr_t)sub);
+        tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_TMP1, 0);
+    }
 }
 
 static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg, int flags)
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 08/16] tcg/mips: Unset TCG_TARGET_HAS_direct_jump
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (6 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 07/16] tcg/mips: Allow JAL to be out of range in tcg_out_bswap_subr Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 22:17   ` Philippe Mathieu-Daudé
  2021-08-18 20:19 ` [PATCH v3 09/16] tcg/mips: Drop special alignment for code_gen_buffer Richard Henderson
                   ` (8 subsequent siblings)
  16 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

Only use indirect jumps.  Finish weaning away from the
unique alignment requirements for code_gen_buffer.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.h     | 12 +++++-------
 tcg/mips/tcg-target.c.inc | 23 +++++------------------
 2 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index c34cccebd3..28c42e23e1 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -39,11 +39,7 @@
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 #define TCG_TARGET_NB_REGS 32
 
-/*
- * We have a 256MB branch region, but leave room to make sure the
- * main executable is also within that region.
- */
-#define MAX_CODE_GEN_BUFFER_SIZE  (128 * MiB)
+#define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 
 typedef enum {
     TCG_REG_ZERO = 0,
@@ -136,7 +132,7 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_muluh_i32        1
 #define TCG_TARGET_HAS_mulsh_i32        1
 #define TCG_TARGET_HAS_bswap32_i32      1
-#define TCG_TARGET_HAS_direct_jump      1
+#define TCG_TARGET_HAS_direct_jump      0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_add2_i32         0
@@ -207,7 +203,9 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_DEFAULT_MO (0)
 #define TCG_TARGET_HAS_MEMORY_BSWAP     1
 
-void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+/* not defined -- call should be eliminated at compile time */
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t)
+    QEMU_ERROR("code path is reachable");
 
 #define TCG_TARGET_NEED_LDST_LABELS
 
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index c65c4ee1f8..1c5c0854c7 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -1977,17 +1977,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         break;
     case INDEX_op_goto_tb:
-        if (s->tb_jmp_insn_offset) {
-            /* direct jump method */
-            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
-            /* Avoid clobbering the address during retranslation.  */
-            tcg_out32(s, OPC_J | (*(uint32_t *)s->code_ptr & 0x3ffffff));
-        } else {
-            /* indirect jump method */
-            tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO,
-                       (uintptr_t)(s->tb_jmp_target_addr + a0));
-            tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0);
-        }
+        /* indirect jump method */
+        tcg_debug_assert(s->tb_jmp_insn_offset == 0);
+        tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO,
+                   (uintptr_t)(s->tb_jmp_target_addr + a0));
+        tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0);
         tcg_out_nop(s);
         set_jmp_reset_offset(s, a0);
         break;
@@ -2861,13 +2855,6 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);   /* global pointer */
 }
 
-void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
-                              uintptr_t jmp_rw, uintptr_t addr)
-{
-    qatomic_set((uint32_t *)jmp_rw, deposit32(OPC_J, 0, 26, addr >> 2));
-    flush_idcache_range(jmp_rx, jmp_rw, 4);
-}
-
 typedef struct {
     DebugFrameHeader h;
     uint8_t fde_def_cfa[4];
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 09/16] tcg/mips: Drop special alignment for code_gen_buffer
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (7 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 08/16] tcg/mips: Unset TCG_TARGET_HAS_direct_jump Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 22:19   ` Philippe Mathieu-Daudé
  2021-08-18 20:19 ` [PATCH v3 10/16] tcg/mips: Create and use TCG_REG_TB Richard Henderson
                   ` (7 subsequent siblings)
  16 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/region.c | 91 ----------------------------------------------------
 1 file changed, 91 deletions(-)

diff --git a/tcg/region.c b/tcg/region.c
index e64c3ea230..9cc30d4922 100644
--- a/tcg/region.c
+++ b/tcg/region.c
@@ -467,38 +467,6 @@ static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
   (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
    ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
 
-#ifdef __mips__
-/*
- * In order to use J and JAL within the code_gen_buffer, we require
- * that the buffer not cross a 256MB boundary.
- */
-static inline bool cross_256mb(void *addr, size_t size)
-{
-    return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
-}
-
-/*
- * We weren't able to allocate a buffer without crossing that boundary,
- * so make do with the larger portion of the buffer that doesn't cross.
- * Returns the new base and size of the buffer in *obuf and *osize.
- */
-static inline void split_cross_256mb(void **obuf, size_t *osize,
-                                     void *buf1, size_t size1)
-{
-    void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
-    size_t size2 = buf1 + size1 - buf2;
-
-    size1 = buf2 - buf1;
-    if (size1 < size2) {
-        size1 = size2;
-        buf1 = buf2;
-    }
-
-    *obuf = buf1;
-    *osize = size1;
-}
-#endif
-
 #ifdef USE_STATIC_CODE_GEN_BUFFER
 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
     __attribute__((aligned(CODE_GEN_ALIGN)));
@@ -526,12 +494,6 @@ static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
         size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
     }
 
-#ifdef __mips__
-    if (cross_256mb(buf, size)) {
-        split_cross_256mb(&buf, &size, buf, size);
-    }
-#endif
-
     region.start_aligned = buf;
     region.total_size = size;
 
@@ -573,39 +535,6 @@ static int alloc_code_gen_buffer_anon(size_t size, int prot,
         return -1;
     }
 
-#ifdef __mips__
-    if (cross_256mb(buf, size)) {
-        /*
-         * Try again, with the original still mapped, to avoid re-acquiring
-         * the same 256mb crossing.
-         */
-        size_t size2;
-        void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
-        switch ((int)(buf2 != MAP_FAILED)) {
-        case 1:
-            if (!cross_256mb(buf2, size)) {
-                /* Success!  Use the new buffer.  */
-                munmap(buf, size);
-                break;
-            }
-            /* Failure.  Work with what we had.  */
-            munmap(buf2, size);
-            /* fallthru */
-        default:
-            /* Split the original buffer.  Free the smaller half.  */
-            split_cross_256mb(&buf2, &size2, buf, size);
-            if (buf == buf2) {
-                munmap(buf + size2, size - size2);
-            } else {
-                munmap(buf, size - size2);
-            }
-            size = size2;
-            break;
-        }
-        buf = buf2;
-    }
-#endif
-
     region.start_aligned = buf;
     region.total_size = size;
     return prot;
@@ -620,35 +549,15 @@ static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
     void *buf_rw = NULL, *buf_rx = MAP_FAILED;
     int fd = -1;
 
-#ifdef __mips__
-    /* Find space for the RX mapping, vs the 256MiB regions. */
-    if (alloc_code_gen_buffer_anon(size, PROT_NONE,
-                                   MAP_PRIVATE | MAP_ANONYMOUS |
-                                   MAP_NORESERVE, errp) < 0) {
-        return false;
-    }
-    /* The size of the mapping may have been adjusted. */
-    buf_rx = region.start_aligned;
-    size = region.total_size;
-#endif
-
     buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
     if (buf_rw == NULL) {
         goto fail;
     }
 
-#ifdef __mips__
-    void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
-                     MAP_SHARED | MAP_FIXED, fd, 0);
-    if (tmp != buf_rx) {
-        goto fail_rx;
-    }
-#else
     buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
     if (buf_rx == MAP_FAILED) {
         goto fail_rx;
     }
-#endif
 
     close(fd);
     region.start_aligned = buf_rw;
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 10/16] tcg/mips: Create and use TCG_REG_TB
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (8 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 09/16] tcg/mips: Drop special alignment for code_gen_buffer Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 20:19 ` [PATCH v3 11/16] tcg/mips: Split out tcg_out_movi_one Richard Henderson
                   ` (6 subsequent siblings)
  16 siblings, 0 replies; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

This vastly reduces the size of code generated for 64-bit addresses.
The code for exit_tb, for instance, where we load a (tagged) pointer
to the current TB, goes from

0x400aa9725c:  li       v0,64
0x400aa97260:  dsll     v0,v0,0x10
0x400aa97264:  ori      v0,v0,0xaa9
0x400aa97268:  dsll     v0,v0,0x10
0x400aa9726c:  j        0x400aa9703c
0x400aa97270:  ori      v0,v0,0x7083

to

0x400aa97240:  j        0x400aa97040
0x400aa97244:  daddiu   v0,s6,-189

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.c.inc | 73 ++++++++++++++++++++++++++++++++-------
 1 file changed, 61 insertions(+), 12 deletions(-)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 1c5c0854c7..333b9572d0 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -88,6 +88,11 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 #ifndef CONFIG_SOFTMMU
 #define TCG_GUEST_BASE_REG TCG_REG_S7
 #endif
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_REG_TB         TCG_REG_S6
+#else
+#define TCG_REG_TB         (qemu_build_not_reached(), TCG_REG_ZERO)
+#endif
 
 /* check if we really need so many registers :P */
 static const int tcg_target_reg_alloc_order[] = {
@@ -1961,34 +1966,72 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     switch (opc) {
     case INDEX_op_exit_tb:
         {
-            TCGReg b0 = TCG_REG_ZERO;
+            TCGReg base = TCG_REG_ZERO;
+            int16_t lo = 0;
 
-            a0 = (intptr_t)a0;
-            if (a0 & ~0xffff) {
-                tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff);
-                b0 = TCG_REG_V0;
+            if (a0) {
+                intptr_t ofs;
+                if (TCG_TARGET_REG_BITS == 64) {
+                    ofs = tcg_tbrel_diff(s, (void *)a0);
+                    lo = ofs;
+                    if (ofs == lo) {
+                        base = TCG_REG_TB;
+                    } else {
+                        base = TCG_REG_V0;
+                        tcg_out_movi(s, TCG_TYPE_PTR, base, ofs - lo);
+                        tcg_out_opc_reg(s, ALIAS_PADD, base, base, TCG_REG_TB);
+                    }
+                } else {
+                    ofs = a0;
+                    lo = ofs;
+                    base = TCG_REG_V0;
+                    tcg_out_movi(s, TCG_TYPE_PTR, base, ofs - lo);
+                }
             }
             if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) {
                 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0,
                              (uintptr_t)tb_ret_addr);
                 tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0);
             }
-            tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff);
+            tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_V0, base, lo);
         }
         break;
     case INDEX_op_goto_tb:
         /* indirect jump method */
         tcg_debug_assert(s->tb_jmp_insn_offset == 0);
-        tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO,
-                   (uintptr_t)(s->tb_jmp_target_addr + a0));
-        tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0);
-        tcg_out_nop(s);
-        set_jmp_reset_offset(s, a0);
+        {
+            TCGReg base, dest;
+            intptr_t ofs;
+
+            if (TCG_TARGET_REG_BITS == 64) {
+                dest = base = TCG_REG_TB;
+                ofs = tcg_tbrel_diff(s, s->tb_jmp_target_addr + a0);
+            } else {
+                dest = TCG_TMP0;
+                base = TCG_REG_ZERO;
+                ofs = (intptr_t)(s->tb_jmp_target_addr + a0);
+            }
+            tcg_out_ld(s, TCG_TYPE_PTR, dest, base, ofs);
+            tcg_out_opc_reg(s, OPC_JR, 0, dest, 0);
+            /* delay slot */
+            tcg_out_nop(s);
+
+            set_jmp_reset_offset(s, args[0]);
+            if (TCG_TARGET_REG_BITS == 64) {
+                /* For the unlinked case, need to reset TCG_REG_TB. */
+                tcg_out_ldst(s, ALIAS_PADDI, TCG_REG_TB, TCG_REG_TB,
+                             -tcg_current_code_size(s));
+            }
+        }
         break;
     case INDEX_op_goto_ptr:
         /* jmp to the given host address (could be epilogue) */
         tcg_out_opc_reg(s, OPC_JR, 0, a0, 0);
-        tcg_out_nop(s);
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
+        } else {
+            tcg_out_nop(s);
+        }
         break;
     case INDEX_op_br:
         tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO,
@@ -2672,6 +2715,9 @@ static void tcg_target_qemu_prologue(TCGContext *s)
         tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
     }
 #endif
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
+    }
 
     /* Call generated code */
     tcg_out_opc_reg(s, OPC_JR, 0, tcg_target_call_iarg_regs[1], 0);
@@ -2853,6 +2899,9 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA);   /* return address */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);   /* stack pointer */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);   /* global pointer */
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tc->tc_ptr */
+    }
 }
 
 typedef struct {
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 11/16] tcg/mips: Split out tcg_out_movi_one
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (9 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 10/16] tcg/mips: Create and use TCG_REG_TB Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 20:19 ` [PATCH v3 12/16] tcg/mips: Split out tcg_out_movi_two Richard Henderson
                   ` (5 subsequent siblings)
  16 siblings, 0 replies; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

Emit all constants that can be loaded in exactly one insn.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.c.inc | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 333b9572d0..b32edd5a7a 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -524,20 +524,34 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
     return true;
 }
 
+static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
+{
+    if (arg == (int16_t)arg) {
+        tcg_out_opc_imm(s, OPC_ADDIU, ret, TCG_REG_ZERO, arg);
+        return true;
+    }
+    if (arg == (uint16_t)arg) {
+        tcg_out_opc_imm(s, OPC_ORI, ret, TCG_REG_ZERO, arg);
+        return true;
+    }
+    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
+        tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16);
+        return true;
+    }
+    return false;
+}
+
 static void tcg_out_movi(TCGContext *s, TCGType type,
                          TCGReg ret, tcg_target_long arg)
 {
     if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
         arg = (int32_t)arg;
     }
-    if (arg == (int16_t)arg) {
-        tcg_out_opc_imm(s, OPC_ADDIU, ret, TCG_REG_ZERO, arg);
-        return;
-    }
-    if (arg == (uint16_t)arg) {
-        tcg_out_opc_imm(s, OPC_ORI, ret, TCG_REG_ZERO, arg);
+
+    if (tcg_out_movi_one(s, ret, arg)) {
         return;
     }
+
     if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
         tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16);
     } else {
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 12/16] tcg/mips: Split out tcg_out_movi_two
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (10 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 11/16] tcg/mips: Split out tcg_out_movi_one Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 20:19 ` [PATCH v3 13/16] tcg/mips: Use the constant pool for 64-bit constants Richard Henderson
                   ` (4 subsequent siblings)
  16 siblings, 0 replies; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

Emit all 32-bit signed constants, which can be loaded in two insns.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.c.inc | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index b32edd5a7a..d351d53a7b 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -541,6 +541,22 @@ static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
     return false;
 }
 
+static bool tcg_out_movi_two(TCGContext *s, TCGReg ret, tcg_target_long arg)
+{
+    /*
+     * All signed 32-bit constants are loadable with two immediates,
+     * and everything else requires more work.
+     */
+    if (arg == (int32_t)arg) {
+        if (!tcg_out_movi_one(s, ret, arg)) {
+            tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16);
+            tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg & 0xffff);
+        }
+        return true;
+    }
+    return false;
+}
+
 static void tcg_out_movi(TCGContext *s, TCGType type,
                          TCGReg ret, tcg_target_long arg)
 {
@@ -548,21 +564,18 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
         arg = (int32_t)arg;
     }
 
-    if (tcg_out_movi_one(s, ret, arg)) {
+    /* Load all 32-bit constants. */
+    if (tcg_out_movi_two(s, ret, arg)) {
         return;
     }
 
-    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
-        tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16);
+    tcg_out_movi(s, TCG_TYPE_I32, ret, arg >> 31 >> 1);
+    if (arg & 0xffff0000ull) {
+        tcg_out_dsll(s, ret, ret, 16);
+        tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg >> 16);
+        tcg_out_dsll(s, ret, ret, 16);
     } else {
-        tcg_out_movi(s, TCG_TYPE_I32, ret, arg >> 31 >> 1);
-        if (arg & 0xffff0000ull) {
-            tcg_out_dsll(s, ret, ret, 16);
-            tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg >> 16);
-            tcg_out_dsll(s, ret, ret, 16);
-        } else {
-            tcg_out_dsll(s, ret, ret, 32);
-        }
+        tcg_out_dsll(s, ret, ret, 32);
     }
     if (arg & 0xffff) {
         tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg & 0xffff);
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 13/16] tcg/mips: Use the constant pool for 64-bit constants
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (11 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 12/16] tcg/mips: Split out tcg_out_movi_two Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 20:19 ` [PATCH v3 14/16] tcg/mips: Aggressively use the constant pool for n64 calls Richard Henderson
                   ` (3 subsequent siblings)
  16 siblings, 0 replies; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

During normal processing, the constant pool is accessible via
TCG_REG_TB.  During the prologue, it is accessible via TCG_REG_T9.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.h     |  1 +
 tcg/mips/tcg-target.c.inc | 65 +++++++++++++++++++++++++++++----------
 2 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 28c42e23e1..839364b493 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -208,5 +208,6 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t)
     QEMU_ERROR("code path is reachable");
 
 #define TCG_TARGET_NEED_LDST_LABELS
+#define TCG_TARGET_NEED_POOL_LABELS
 
 #endif
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index d351d53a7b..2ab37ac7c0 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -25,6 +25,7 @@
  */
 
 #include "../tcg-ldst.c.inc"
+#include "../tcg-pool.c.inc"
 
 #ifdef HOST_WORDS_BIGENDIAN
 # define MIPS_BE  1
@@ -166,9 +167,18 @@ static bool reloc_pc16(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
-    tcg_debug_assert(type == R_MIPS_PC16);
-    tcg_debug_assert(addend == 0);
-    return reloc_pc16(code_ptr, (const tcg_insn_unit *)value);
+    value += addend;
+    switch (type) {
+    case R_MIPS_PC16:
+        return reloc_pc16(code_ptr, (const tcg_insn_unit *)value);
+    case R_MIPS_16:
+        if (value != (int16_t)value) {
+            return false;
+        }
+        *code_ptr = deposit32(*code_ptr, 0, 16, value);
+        return true;
+    }
+    g_assert_not_reached();
 }
 
 #define TCG_CT_CONST_ZERO 0x100
@@ -500,6 +510,11 @@ static void tcg_out_nop(TCGContext *s)
     tcg_out32(s, 0);
 }
 
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+    memset(p, 0, count * sizeof(tcg_insn_unit));
+}
+
 static void tcg_out_dsll(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
 {
     tcg_out_opc_sa64(s, OPC_DSLL, OPC_DSLL32, rd, rt, sa);
@@ -557,8 +572,15 @@ static bool tcg_out_movi_two(TCGContext *s, TCGReg ret, tcg_target_long arg)
     return false;
 }
 
-static void tcg_out_movi(TCGContext *s, TCGType type,
-                         TCGReg ret, tcg_target_long arg)
+static void tcg_out_movi_pool(TCGContext *s, TCGReg ret,
+                              tcg_target_long arg, TCGReg tbreg)
+{
+    new_pool_label(s, arg, R_MIPS_16, s->code_ptr, tcg_tbrel_diff(s, NULL));
+    tcg_out_opc_imm(s, OPC_LD, ret, tbreg, 0);
+}
+
+static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
+                             tcg_target_long arg, TCGReg tbreg)
 {
     if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
         arg = (int32_t)arg;
@@ -568,18 +590,17 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
     if (tcg_out_movi_two(s, ret, arg)) {
         return;
     }
+    assert(TCG_TARGET_REG_BITS == 64);
 
-    tcg_out_movi(s, TCG_TYPE_I32, ret, arg >> 31 >> 1);
-    if (arg & 0xffff0000ull) {
-        tcg_out_dsll(s, ret, ret, 16);
-        tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg >> 16);
-        tcg_out_dsll(s, ret, ret, 16);
-    } else {
-        tcg_out_dsll(s, ret, ret, 32);
-    }
-    if (arg & 0xffff) {
-        tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg & 0xffff);
-    }
+    /* Otherwise, put 64-bit constants into the constant pool. */
+    tcg_out_movi_pool(s, ret, arg, tbreg);
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+                         TCGReg ret, tcg_target_long arg)
+{
+    TCGReg tbreg = TCG_TARGET_REG_BITS == 64 ? TCG_REG_TB : 0;
+    tcg_out_movi_int(s, type, ret, arg, tbreg);
 }
 
 static void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg, int flags)
@@ -2738,10 +2759,20 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
 #ifndef CONFIG_SOFTMMU
     if (guest_base != (int16_t)guest_base) {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
+        /*
+         * The function call abi for n32 and n64 will have loaded $25 (t9)
+         * with the address of the prologue, so we can use that instead
+         * of TCG_REG_TB.
+         */
+#if TCG_TARGET_REG_BITS == 64 && !defined(__mips_abicalls)
+# error "Unknown mips abi"
+#endif
+        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base,
+                         TCG_TARGET_REG_BITS == 64 ? TCG_REG_T9 : 0);
         tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
     }
 #endif
+
     if (TCG_TARGET_REG_BITS == 64) {
         tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
     }
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 14/16] tcg/mips: Aggressively use the constant pool for n64 calls
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (12 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 13/16] tcg/mips: Use the constant pool for 64-bit constants Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 20:19 ` [PATCH v3 15/16] tcg/mips: Try tb-relative addresses in tcg_out_movi Richard Henderson
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

Repeated calls to a single helper are common -- especially
the ones for softmmu memory access.  Prefer the constant pool
to longer sequences to increase sharing.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.c.inc | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 2ab37ac7c0..f641d86561 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -1057,9 +1057,19 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
 
 static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
 {
-    /* Note that the ABI requires the called function's address to be
-       loaded into T9, even if a direct branch is in range.  */
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg);
+    /*
+     * Note that __mips_abicalls requires the called function's address
+     * to be loaded into $25 (t9), even if a direct branch is in range.
+     *
+     * For n64, always drop the pointer into the constant pool.
+     * We can re-use helper addresses often and do not want any
+     * of the longer sequences tcg_out_movi may try.
+     */
+    if (sizeof(uintptr_t) == 8) {
+        tcg_out_movi_pool(s, TCG_REG_T9, (uintptr_t)arg, TCG_REG_TB);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg);
+    }
 
     /* But do try a direct branch, allowing the cpu better insn prefetch.  */
     if (tail) {
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 15/16] tcg/mips: Try tb-relative addresses in tcg_out_movi
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (13 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 14/16] tcg/mips: Aggressively use the constant pool for n64 calls Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 20:19 ` [PATCH v3 16/16] tcg/mips: Try three insns with shift and add " Richard Henderson
  2021-08-18 22:07 ` [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Philippe Mathieu-Daudé
  16 siblings, 0 replies; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

These addresses are often loaded by the qemu_ld/st slow path,
for loading the retaddr value.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.c.inc | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index f641d86561..27a23662c8 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -582,6 +582,8 @@ static void tcg_out_movi_pool(TCGContext *s, TCGReg ret,
 static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
                              tcg_target_long arg, TCGReg tbreg)
 {
+    tcg_target_long tmp;
+
     if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
         arg = (int32_t)arg;
     }
@@ -592,6 +594,17 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
     }
     assert(TCG_TARGET_REG_BITS == 64);
 
+    /* Load addresses within 2GB of TB with 1 or 3 insns. */
+    tmp = tcg_tbrel_diff(s, (void *)arg);
+    if (tmp == (int16_t)tmp) {
+        tcg_out_opc_imm(s, OPC_DADDIU, ret, tbreg, tmp);
+        return;
+    }
+    if (tcg_out_movi_two(s, ret, tmp)) {
+        tcg_out_opc_reg(s, OPC_DADDU, ret, ret, tbreg);
+        return;
+    }
+
     /* Otherwise, put 64-bit constants into the constant pool. */
     tcg_out_movi_pool(s, ret, arg, tbreg);
 }
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v3 16/16] tcg/mips: Try three insns with shift and add in tcg_out_movi
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (14 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 15/16] tcg/mips: Try tb-relative addresses in tcg_out_movi Richard Henderson
@ 2021-08-18 20:19 ` Richard Henderson
  2021-08-18 22:07 ` [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Philippe Mathieu-Daudé
  16 siblings, 0 replies; 27+ messages in thread
From: Richard Henderson @ 2021-08-18 20:19 UTC (permalink / raw)
  To: qemu-devel

These sequences are inexpensive to test.  Maxing out at three insns
results in the same space as a load plus the constant pool entry.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/mips/tcg-target.c.inc | 44 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 27a23662c8..92f324de68 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -583,6 +583,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
                              tcg_target_long arg, TCGReg tbreg)
 {
     tcg_target_long tmp;
+    int sh, lo;
 
     if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
         arg = (int32_t)arg;
@@ -605,6 +606,49 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
         return;
     }
 
+    /*
+     * Load bitmasks with a right-shift.  This is good for things
+     * like 0x0fff_ffff_ffff_fff0: ADDUI r,0xff00 + DSRL r,r,4.
+     * or similarly using LUI.  For this to work, bit 31 must be set.
+     */
+    if (arg > 0 && (int32_t)arg < 0) {
+        sh = clz64(arg);
+        if (tcg_out_movi_one(s, ret, arg << sh)) {
+            tcg_out_dsrl(s, ret, ret, sh);
+            return;
+        }
+    }
+
+    /*
+     * Load slightly larger constants using left-shift.
+     * Limit this sequence to 3 insns to avoid too much expansion.
+     */
+    sh = ctz64(arg);
+    if (sh && tcg_out_movi_two(s, ret, arg >> sh)) {
+        tcg_out_dsll(s, ret, ret, sh);
+        return;
+    }
+
+    /*
+     * Load slightly larger constants using left-shift and add/or.
+     * Prefer addi with a negative immediate when that would produce
+     * a larger shift.  For this to work, bits 15 and 16 must be set.
+     */
+    lo = arg & 0xffff;
+    if (lo) {
+        if ((arg & 0x18000) == 0x18000) {
+            lo = (int16_t)arg;
+        }
+        tmp = arg - lo;
+        sh = ctz64(tmp);
+        tmp >>= sh;
+        if (tcg_out_movi_one(s, ret, tmp)) {
+            tcg_out_dsll(s, ret, ret, sh);
+            tcg_out_opc_imm(s, lo < 0 ? OPC_DADDIU : OPC_ORI, ret, ret, lo);
+            return;
+        }
+    }
+
     /* Otherwise, put 64-bit constants into the constant pool. */
     tcg_out_movi_pool(s, ret, arg, tbreg);
 }
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 03/16] tcg/mips: Drop inline markers
  2021-08-18 20:19 ` [PATCH v3 03/16] tcg/mips: Drop inline markers Richard Henderson
@ 2021-08-18 21:03   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 27+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-08-18 21:03 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 8/18/21 10:19 PM, Richard Henderson wrote:
> Let the compiler decide about inlining.
> Remove tcg_out_ext8s and tcg_out_ext16s as unused.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/mips/tcg-target.c.inc | 76 ++++++++++++++-------------------------
>  1 file changed, 27 insertions(+), 49 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup
  2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
                   ` (15 preceding siblings ...)
  2021-08-18 20:19 ` [PATCH v3 16/16] tcg/mips: Try three insns with shift and add " Richard Henderson
@ 2021-08-18 22:07 ` Philippe Mathieu-Daudé
  2021-08-18 22:09   ` Philippe Mathieu-Daudé
  16 siblings, 1 reply; 27+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-08-18 22:07 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: Huacai Chen

Cc'ing Jiaxun & Huacai.

On 8/18/21 10:19 PM, Richard Henderson wrote:
> Based-on: <20210818191920.390759-1-richard.henderson@linaro.org>
> ("[PATCH v3 00/66] Unaligned access for user-only")
> 
> Important points:
>   * Support unaligned accesses.
>   * Drop requirement for 256MB alignment of code_gen_buffer.
>   * Improvements to tcg_out_movi:
>     - Have a tb-relative register for mips64, reducing the
>       code size for most pointers,
>     - Try a few 3-insn sequences,
>     - Drop everything else into a constant pool.
> 
> 
> r~
> 
> 
> Richard Henderson (16):
>   tcg/mips: Support unaligned access for user-only
>   tcg/mips: Support unaligned access for softmmu
>   tcg/mips: Drop inline markers
>   tcg/mips: Move TCG_AREG0 to S8
>   tcg/mips: Move TCG_GUEST_BASE_REG to S7
>   tcg/mips: Unify TCG_GUEST_BASE_REG tests
>   tcg/mips: Allow JAL to be out of range in tcg_out_bswap_subr
>   tcg/mips: Unset TCG_TARGET_HAS_direct_jump
>   tcg/mips: Drop special alignment for code_gen_buffer
>   tcg/mips: Create and use TCG_REG_TB
>   tcg/mips: Split out tcg_out_movi_one
>   tcg/mips: Split out tcg_out_movi_two
>   tcg/mips: Use the constant pool for 64-bit constants
>   tcg/mips: Aggressively use the constant pool for n64 calls
>   tcg/mips: Try tb-relative addresses in tcg_out_movi
>   tcg/mips: Try three insns with shift and add in tcg_out_movi
> 
>  tcg/mips/tcg-target.h     |  17 +-
>  tcg/region.c              |  91 -----
>  tcg/mips/tcg-target.c.inc | 730 +++++++++++++++++++++++++++++++-------
>  3 files changed, 604 insertions(+), 234 deletions(-)
> 



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup
  2021-08-18 22:07 ` [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Philippe Mathieu-Daudé
@ 2021-08-18 22:09   ` Philippe Mathieu-Daudé
  2021-08-20  7:16     ` Huacai Chen
  0 siblings, 1 reply; 27+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-08-18 22:09 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel@nongnu.org Developers; +Cc: Huacai Chen

Sorry, use Huacai's newer email .

On Thu, Aug 19, 2021 at 12:07 AM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote:
>
> Cc'ing Jiaxun & Huacai.
>
> On 8/18/21 10:19 PM, Richard Henderson wrote:
> > Based-on: <20210818191920.390759-1-richard.henderson@linaro.org>
> > ("[PATCH v3 00/66] Unaligned access for user-only")
> >
> > Important points:
> >   * Support unaligned accesses.
> >   * Drop requirement for 256MB alignment of code_gen_buffer.
> >   * Improvements to tcg_out_movi:
> >     - Have a tb-relative register for mips64, reducing the
> >       code size for most pointers,
> >     - Try a few 3-insn sequences,
> >     - Drop everything else into a constant pool.
> >
> >
> > r~
> >
> >
> > Richard Henderson (16):
> >   tcg/mips: Support unaligned access for user-only
> >   tcg/mips: Support unaligned access for softmmu
> >   tcg/mips: Drop inline markers
> >   tcg/mips: Move TCG_AREG0 to S8
> >   tcg/mips: Move TCG_GUEST_BASE_REG to S7
> >   tcg/mips: Unify TCG_GUEST_BASE_REG tests
> >   tcg/mips: Allow JAL to be out of range in tcg_out_bswap_subr
> >   tcg/mips: Unset TCG_TARGET_HAS_direct_jump
> >   tcg/mips: Drop special alignment for code_gen_buffer
> >   tcg/mips: Create and use TCG_REG_TB
> >   tcg/mips: Split out tcg_out_movi_one
> >   tcg/mips: Split out tcg_out_movi_two
> >   tcg/mips: Use the constant pool for 64-bit constants
> >   tcg/mips: Aggressively use the constant pool for n64 calls
> >   tcg/mips: Try tb-relative addresses in tcg_out_movi
> >   tcg/mips: Try three insns with shift and add in tcg_out_movi
> >
> >  tcg/mips/tcg-target.h     |  17 +-
> >  tcg/region.c              |  91 -----
> >  tcg/mips/tcg-target.c.inc | 730 +++++++++++++++++++++++++++++++-------
> >  3 files changed, 604 insertions(+), 234 deletions(-)
> >
>


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 04/16] tcg/mips: Move TCG_AREG0 to S8
  2021-08-18 20:19 ` [PATCH v3 04/16] tcg/mips: Move TCG_AREG0 to S8 Richard Henderson
@ 2021-08-18 22:10   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 27+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-08-18 22:10 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 8/18/21 10:19 PM, Richard Henderson wrote:
> No functional change; just moving the saved reserved regs to the end.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/mips/tcg-target.h     | 2 +-
>  tcg/mips/tcg-target.c.inc | 4 ++--
>  2 files changed, 3 insertions(+), 3 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 05/16] tcg/mips: Move TCG_GUEST_BASE_REG to S7
  2021-08-18 20:19 ` [PATCH v3 05/16] tcg/mips: Move TCG_GUEST_BASE_REG to S7 Richard Henderson
@ 2021-08-18 22:11   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 27+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-08-18 22:11 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 8/18/21 10:19 PM, Richard Henderson wrote:
> No functional change; just moving the saved reserved regs to the end.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/mips/tcg-target.c.inc | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
> index 92bde50704..b3a2cc88ab 100644
> --- a/tcg/mips/tcg-target.c.inc
> +++ b/tcg/mips/tcg-target.c.inc
> @@ -86,7 +86,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
>  #define TCG_TMP3  TCG_REG_T7
>  
>  #ifndef CONFIG_SOFTMMU
> -#define TCG_GUEST_BASE_REG TCG_REG_S1
> +#define TCG_GUEST_BASE_REG TCG_REG_S7
>  #endif

Maybe add a comment in tcg_target_callee_save_regs[]?

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 08/16] tcg/mips: Unset TCG_TARGET_HAS_direct_jump
  2021-08-18 20:19 ` [PATCH v3 08/16] tcg/mips: Unset TCG_TARGET_HAS_direct_jump Richard Henderson
@ 2021-08-18 22:17   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 27+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-08-18 22:17 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 8/18/21 10:19 PM, Richard Henderson wrote:
> Only use indirect jumps.  Finish weaning away from the
> unique alignment requirements for code_gen_buffer.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/mips/tcg-target.h     | 12 +++++-------
>  tcg/mips/tcg-target.c.inc | 23 +++++------------------
>  2 files changed, 10 insertions(+), 25 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 09/16] tcg/mips: Drop special alignment for code_gen_buffer
  2021-08-18 20:19 ` [PATCH v3 09/16] tcg/mips: Drop special alignment for code_gen_buffer Richard Henderson
@ 2021-08-18 22:19   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 27+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-08-18 22:19 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 8/18/21 10:19 PM, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/region.c | 91 ----------------------------------------------------
>  1 file changed, 91 deletions(-)

Yay!

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup
  2021-08-18 22:09   ` Philippe Mathieu-Daudé
@ 2021-08-20  7:16     ` Huacai Chen
  0 siblings, 0 replies; 27+ messages in thread
From: Huacai Chen @ 2021-08-20  7:16 UTC (permalink / raw)
  To: Philippe Mathieu-Daudé
  Cc: Richard Henderson, qemu-devel@nongnu.org Developers

Hi, Jiaxun,

I'm not familiar with TCG, please review, thanks.

Huacai

On Thu, Aug 19, 2021 at 6:09 AM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote:
>
> Sorry, use Huacai's newer email .
>
> On Thu, Aug 19, 2021 at 12:07 AM Philippe Mathieu-Daudé <f4bug@amsat.org> wrote:
> >
> > Cc'ing Jiaxun & Huacai.
> >
> > On 8/18/21 10:19 PM, Richard Henderson wrote:
> > > Based-on: <20210818191920.390759-1-richard.henderson@linaro.org>
> > > ("[PATCH v3 00/66] Unaligned access for user-only")
> > >
> > > Important points:
> > >   * Support unaligned accesses.
> > >   * Drop requirement for 256MB alignment of code_gen_buffer.
> > >   * Improvements to tcg_out_movi:
> > >     - Have a tb-relative register for mips64, reducing the
> > >       code size for most pointers,
> > >     - Try a few 3-insn sequences,
> > >     - Drop everything else into a constant pool.
> > >
> > >
> > > r~
> > >
> > >
> > > Richard Henderson (16):
> > >   tcg/mips: Support unaligned access for user-only
> > >   tcg/mips: Support unaligned access for softmmu
> > >   tcg/mips: Drop inline markers
> > >   tcg/mips: Move TCG_AREG0 to S8
> > >   tcg/mips: Move TCG_GUEST_BASE_REG to S7
> > >   tcg/mips: Unify TCG_GUEST_BASE_REG tests
> > >   tcg/mips: Allow JAL to be out of range in tcg_out_bswap_subr
> > >   tcg/mips: Unset TCG_TARGET_HAS_direct_jump
> > >   tcg/mips: Drop special alignment for code_gen_buffer
> > >   tcg/mips: Create and use TCG_REG_TB
> > >   tcg/mips: Split out tcg_out_movi_one
> > >   tcg/mips: Split out tcg_out_movi_two
> > >   tcg/mips: Use the constant pool for 64-bit constants
> > >   tcg/mips: Aggressively use the constant pool for n64 calls
> > >   tcg/mips: Try tb-relative addresses in tcg_out_movi
> > >   tcg/mips: Try three insns with shift and add in tcg_out_movi
> > >
> > >  tcg/mips/tcg-target.h     |  17 +-
> > >  tcg/region.c              |  91 -----
> > >  tcg/mips/tcg-target.c.inc | 730 +++++++++++++++++++++++++++++++-------
> > >  3 files changed, 604 insertions(+), 234 deletions(-)
> > >
> >


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 02/16] tcg/mips: Support unaligned access for softmmu
  2021-08-18 20:19 ` [PATCH v3 02/16] tcg/mips: Support unaligned access for softmmu Richard Henderson
@ 2021-08-21  6:39   ` Jiaxun Yang
  0 siblings, 0 replies; 27+ messages in thread
From: Jiaxun Yang @ 2021-08-21  6:39 UTC (permalink / raw)
  To: BALATON Zoltan via



在2021年8月19日八月 上午4:19,Richard Henderson写道:
> We can use the routines just added for user-only to emit
> unaligned accesses in softmmu mode too.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>

Thanks!

> ---
>  tcg/mips/tcg-target.c.inc | 91 ++++++++++++++++++++++-----------------
>  1 file changed, 51 insertions(+), 40 deletions(-)
> 
> diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
> index 7ed0de9dae..3d6a0ba39e 100644
> --- a/tcg/mips/tcg-target.c.inc
> +++ b/tcg/mips/tcg-target.c.inc
> @@ -1154,8 +1154,10 @@ static void tcg_out_tlb_load(TCGContext *s, 
> TCGReg base, TCGReg addrl,
>                               tcg_insn_unit *label_ptr[2], bool is_load)
>  {
>      MemOp opc = get_memop(oi);
> -    unsigned s_bits = opc & MO_SIZE;
>      unsigned a_bits = get_alignment_bits(opc);
> +    unsigned s_bits = opc & MO_SIZE;
> +    unsigned a_mask = (1 << a_bits) - 1;
> +    unsigned s_mask = (1 << s_bits) - 1;
>      int mem_index = get_mmuidx(oi);
>      int fast_off = TLB_MASK_TABLE_OFS(mem_index);
>      int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
> @@ -1163,7 +1165,7 @@ static void tcg_out_tlb_load(TCGContext *s, 
> TCGReg base, TCGReg addrl,
>      int add_off = offsetof(CPUTLBEntry, addend);
>      int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
>                     : offsetof(CPUTLBEntry, addr_write));
> -    target_ulong mask;
> +    target_ulong tlb_mask;
>  
>      /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
>      tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
> @@ -1177,27 +1179,13 @@ static void tcg_out_tlb_load(TCGContext *s, 
> TCGReg base, TCGReg addrl,
>      /* Add the tlb_table pointer, creating the CPUTLBEntry address in 
> TMP3.  */
>      tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
>  
> -    /* We don't currently support unaligned accesses.
> -       We could do so with mips32r6.  */
> -    if (a_bits < s_bits) {
> -        a_bits = s_bits;
> -    }
> -
> -    /* Mask the page bits, keeping the alignment bits to compare against.  */
> -    mask = (target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
> -
>      /* Load the (low-half) tlb comparator.  */
>      if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
> -        tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
> -        tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, mask);
> +        tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
>      } else {
>          tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
>                           : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
>                       TCG_TMP0, TCG_TMP3, cmp_off);
> -        tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, mask);
> -        /* No second compare is required here;
> -           load the tlb addend for the fast path.  */
> -        tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
>      }
>  
>      /* Zero extend a 32-bit guest address for a 64-bit host. */
> @@ -1205,7 +1193,25 @@ static void tcg_out_tlb_load(TCGContext *s, 
> TCGReg base, TCGReg addrl,
>          tcg_out_ext32u(s, base, addrl);
>          addrl = base;
>      }
> -    tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
> +
> +    /*
> +     * Mask the page bits, keeping the alignment bits to compare 
> against.
> +     * For unaligned accesses, compare against the end of the access to
> +     * verify that it does not cross a page boundary.
> +     */
> +    tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
> +    tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
> +    if (a_mask >= s_mask) {
> +        tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
> +    } else {
> +        tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrl, s_mask - 
> a_mask);
> +        tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
> +    }
> +
> +    if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
> +        /* Load the tlb addend for the fast path.  */
> +        tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
> +    }
>  
>      label_ptr[0] = s->code_ptr;
>      tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
> @@ -1213,7 +1219,7 @@ static void tcg_out_tlb_load(TCGContext *s, 
> TCGReg base, TCGReg addrl,
>      /* Load and test the high half tlb comparator.  */
>      if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
>          /* delay slot */
> -        tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3, cmp_off + 
> HI_OFF);
> +        tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
>  
>          /* Load the tlb addend for the fast path.  */
>          tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
> @@ -1535,8 +1541,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, 
> TCGReg lo, TCGReg hi,
>      }
>  }
>  
> -static void __attribute__((unused))
> -tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
> +static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg 
> hi,
>                                      TCGReg base, MemOp opc, bool is_64)
>  {
>      const MIPSInsn lw1 = MIPS_BE ? OPC_LWL : OPC_LWR;
> @@ -1655,8 +1660,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const 
> TCGArg *args, bool is_64)
>  #if defined(CONFIG_SOFTMMU)
>      tcg_insn_unit *label_ptr[2];
>  #else
> -    unsigned a_bits, s_bits;
>  #endif
> +    unsigned a_bits, s_bits;
>      TCGReg base = TCG_REG_A0;
>  
>      data_regl = *args++;
> @@ -1665,10 +1670,20 @@ static void tcg_out_qemu_ld(TCGContext *s, 
> const TCGArg *args, bool is_64)
>      addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
>      oi = *args++;
>      opc = get_memop(oi);
> +    a_bits = get_alignment_bits(opc);
> +    s_bits = opc & MO_SIZE;
>  
> +    /*
> +     * R6 removes the left/right instructions but requires the
> +     * system to support misaligned memory accesses.
> +     */
>  #if defined(CONFIG_SOFTMMU)
>      tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1);
> -    tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
> +    if (use_mips32r6_instructions || a_bits >= s_bits) {
> +        tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, 
> is_64);
> +    } else {
> +        tcg_out_qemu_ld_unalign(s, data_regl, data_regh, base, opc, 
> is_64);
> +    }
>      add_qemu_ldst_label(s, 1, oi,
>                          (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
>                          data_regl, data_regh, addr_regl, addr_regh,
> @@ -1685,12 +1700,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const 
> TCGArg *args, bool is_64)
>      } else {
>          tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, 
> addr_regl);
>      }
> -    a_bits = get_alignment_bits(opc);
> -    s_bits = opc & MO_SIZE;
> -    /*
> -     * R6 removes the left/right instructions but requires the
> -     * system to support misaligned memory accesses.
> -     */
>      if (use_mips32r6_instructions) {
>          if (a_bits) {
>              tcg_out_test_alignment(s, true, addr_regl, addr_regh, 
> a_bits);
> @@ -1770,8 +1779,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, 
> TCGReg lo, TCGReg hi,
>      }
>  }
>  
> -static void __attribute__((unused))
> -tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
> +static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg 
> hi,
>                                      TCGReg base, MemOp opc)
>  {
>      const MIPSInsn sw1 = MIPS_BE ? OPC_SWL : OPC_SWR;
> @@ -1851,9 +1859,8 @@ static void tcg_out_qemu_st(TCGContext *s, const 
> TCGArg *args, bool is_64)
>      MemOp opc;
>  #if defined(CONFIG_SOFTMMU)
>      tcg_insn_unit *label_ptr[2];
> -#else
> -    unsigned a_bits, s_bits;
>  #endif
> +    unsigned a_bits, s_bits;
>      TCGReg base = TCG_REG_A0;
>  
>      data_regl = *args++;
> @@ -1862,10 +1869,20 @@ static void tcg_out_qemu_st(TCGContext *s, 
> const TCGArg *args, bool is_64)
>      addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
>      oi = *args++;
>      opc = get_memop(oi);
> +    a_bits = get_alignment_bits(opc);
> +    s_bits = opc & MO_SIZE;
>  
> +    /*
> +     * R6 removes the left/right instructions but requires the
> +     * system to support misaligned memory accesses.
> +     */
>  #if defined(CONFIG_SOFTMMU)
>      tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 0);
> -    tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
> +    if (use_mips32r6_instructions || a_bits >= s_bits) {
> +        tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
> +    } else {
> +        tcg_out_qemu_st_unalign(s, data_regl, data_regh, base, opc);
> +    }
>      add_qemu_ldst_label(s, 0, oi,
>                          (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
>                          data_regl, data_regh, addr_regl, addr_regh,
> @@ -1882,12 +1899,6 @@ static void tcg_out_qemu_st(TCGContext *s, const 
> TCGArg *args, bool is_64)
>      } else {
>          tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, 
> addr_regl);
>      }
> -    a_bits = get_alignment_bits(opc);
> -    s_bits = opc & MO_SIZE;
> -    /*
> -     * R6 removes the left/right instructions but requires the
> -     * system to support misaligned memory accesses.
> -     */
>      if (use_mips32r6_instructions) {
>          if (a_bits) {
>              tcg_out_test_alignment(s, true, addr_regl, addr_regh, 
> a_bits);
> -- 
> 2.25.1
> 
> 
> 


-- 
- Jiaxun


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v3 07/16] tcg/mips: Allow JAL to be out of range in tcg_out_bswap_subr
  2021-08-18 20:19 ` [PATCH v3 07/16] tcg/mips: Allow JAL to be out of range in tcg_out_bswap_subr Richard Henderson
@ 2021-09-20 21:14   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 27+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-09-20 21:14 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 8/18/21 22:19, Richard Henderson wrote:
> Weaning off of unique alignment requirements, so allow JAL
> to not reach the target.  TCG_TMP1 is always available for
> use as a scratch because it is clobbered by the subroutine
> being called.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/mips/tcg-target.c.inc | 6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>


^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2021-09-20 21:16 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-18 20:19 [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Richard Henderson
2021-08-18 20:19 ` [PATCH v3 01/16] tcg/mips: Support unaligned access for user-only Richard Henderson
2021-08-18 20:19 ` [PATCH v3 02/16] tcg/mips: Support unaligned access for softmmu Richard Henderson
2021-08-21  6:39   ` Jiaxun Yang
2021-08-18 20:19 ` [PATCH v3 03/16] tcg/mips: Drop inline markers Richard Henderson
2021-08-18 21:03   ` Philippe Mathieu-Daudé
2021-08-18 20:19 ` [PATCH v3 04/16] tcg/mips: Move TCG_AREG0 to S8 Richard Henderson
2021-08-18 22:10   ` Philippe Mathieu-Daudé
2021-08-18 20:19 ` [PATCH v3 05/16] tcg/mips: Move TCG_GUEST_BASE_REG to S7 Richard Henderson
2021-08-18 22:11   ` Philippe Mathieu-Daudé
2021-08-18 20:19 ` [PATCH v3 06/16] tcg/mips: Unify TCG_GUEST_BASE_REG tests Richard Henderson
2021-08-18 20:19 ` [PATCH v3 07/16] tcg/mips: Allow JAL to be out of range in tcg_out_bswap_subr Richard Henderson
2021-09-20 21:14   ` Philippe Mathieu-Daudé
2021-08-18 20:19 ` [PATCH v3 08/16] tcg/mips: Unset TCG_TARGET_HAS_direct_jump Richard Henderson
2021-08-18 22:17   ` Philippe Mathieu-Daudé
2021-08-18 20:19 ` [PATCH v3 09/16] tcg/mips: Drop special alignment for code_gen_buffer Richard Henderson
2021-08-18 22:19   ` Philippe Mathieu-Daudé
2021-08-18 20:19 ` [PATCH v3 10/16] tcg/mips: Create and use TCG_REG_TB Richard Henderson
2021-08-18 20:19 ` [PATCH v3 11/16] tcg/mips: Split out tcg_out_movi_one Richard Henderson
2021-08-18 20:19 ` [PATCH v3 12/16] tcg/mips: Split out tcg_out_movi_two Richard Henderson
2021-08-18 20:19 ` [PATCH v3 13/16] tcg/mips: Use the constant pool for 64-bit constants Richard Henderson
2021-08-18 20:19 ` [PATCH v3 14/16] tcg/mips: Aggressively use the constant pool for n64 calls Richard Henderson
2021-08-18 20:19 ` [PATCH v3 15/16] tcg/mips: Try tb-relative addresses in tcg_out_movi Richard Henderson
2021-08-18 20:19 ` [PATCH v3 16/16] tcg/mips: Try three insns with shift and add " Richard Henderson
2021-08-18 22:07 ` [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup Philippe Mathieu-Daudé
2021-08-18 22:09   ` Philippe Mathieu-Daudé
2021-08-20  7:16     ` Huacai Chen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.