All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: james.hogan@imgtec.com, aurelien@aurel32.net
Subject: [Qemu-devel] [PATCH 10/15] tcg-mips: Move bswap code to subroutines
Date: Tue,  9 Feb 2016 21:39:58 +1100	[thread overview]
Message-ID: <1455014403-10742-11-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1455014403-10742-1-git-send-email-rth@twiddle.net>

Without the mips32r2 / mips64r2 instructions to perform swapping,
32 and 64-bit bswap is quite large.  Move them to a subroutine in
the prologue block to minimize code bloat.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/mips/tcg-target.c | 389 ++++++++++++++++++++++++++++++++++----------------
 tcg/mips/tcg-target.h |   6 +-
 2 files changed, 271 insertions(+), 124 deletions(-)

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index b8c5d90..97f9251 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -135,6 +135,9 @@ static const TCGReg tcg_target_call_oarg_regs[2] = {
 };
 
 static tcg_insn_unit *tb_ret_addr;
+static tcg_insn_unit *bswap32s_addr;
+static tcg_insn_unit *bswap32u_addr;
+static tcg_insn_unit *bswap64_addr;
 
 static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target)
 {
@@ -187,6 +190,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
     ct_str = *pct_str;
     switch(ct_str[0]) {
     case 'r':
+    do_default:
         ct->ct |= TCG_CT_REG;
         tcg_regset_set(ct->u.regs, 0xffffffff);
         break;
@@ -208,6 +212,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
     case 'S': /* qemu_st constraint */
         ct->ct |= TCG_CT_REG;
         tcg_regset_set(ct->u.regs, 0xffffffff);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_V0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
 #if defined(CONFIG_SOFTMMU)
         if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
@@ -218,6 +223,22 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         }
 #endif
         break;
+    case 'v': /* bswap output constraint */
+        if (use_mips32r2_instructions) {
+            goto do_default;
+        }
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_clear(ct->u.regs);
+        tcg_regset_set_reg(ct->u.regs, TCG_REG_V0);
+        break;
+    case 'a': /* bswap input constraint */
+        if (use_mips32r2_instructions) {
+            goto do_default;
+        }
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_clear(ct->u.regs);
+        tcg_regset_set_reg(ct->u.regs, TCG_REG_A0);
+        break;
     case 'I':
         ct->ct |= TCG_CT_CONST_U16;
         break;
@@ -618,29 +639,23 @@ static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
     }
 }
 
+static void tcg_out_bswap_subr(TCGContext *s, tcg_insn_unit *sub)
+{
+    if (!tcg_out_opc_jmp(s, OPC_JAL, sub)) {
+        tcg_abort();
+    }
+}
+
 static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
 {
     if (use_mips32r2_instructions) {
         tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
         tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
     } else {
-        /* ret and arg must be different and can't be register at */
-        if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) {
-            tcg_abort();
-        }
-
-        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
-
-        tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
-
-        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00);
-        tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
-
-        tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8);
-        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+        assert(ret == TCG_REG_V0);
+        tcg_out_bswap_subr(s, bswap32s_addr);
+        /* delay slot */
+        tcg_out_opc_reg(s, OPC_OR, TCG_REG_A0, arg, TCG_REG_ZERO);
     }
 }
 
@@ -648,26 +663,13 @@ static inline void tcg_out_bswap32u(TCGContext *s, TCGReg ret, TCGReg arg)
 {
     if (use_mips32r2_instructions) {
         tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg);
-        tcg_out_opc_reg(s, OPC_DSHD, ret, 0, arg);
+        tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret);
         tcg_out_dsrl(s, ret, ret, 32);
     } else {
-        /* ret and arg must be different and can't be register at */
-        if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) {
-            tcg_abort();
-        }
-
-        tcg_out_dsll(s, ret, arg, 24);
-
-        tcg_out_dsrl(s, TCG_TMP0, arg, 24);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
-
-        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00);
-        tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 8);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
-
-        tcg_out_dsrl(s, TCG_TMP0, arg, 8);
-        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+        assert(ret == TCG_REG_V0);
+        tcg_out_bswap_subr(s, bswap32u_addr);
+        /* delay slot */
+        tcg_out_opc_reg(s, OPC_OR, TCG_REG_A0, arg, TCG_REG_ZERO);
     }
 }
 
@@ -677,44 +679,10 @@ static void tcg_out_bswap64(TCGContext *s, TCGReg ret, TCGReg arg)
         tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg);
         tcg_out_opc_reg(s, OPC_DSHD, ret, 0, arg);
     } else {
-        /* ret and arg must be different and can't be either tmp reg.  */
-        if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0
-            || ret == TCG_TMP1 || arg == TCG_TMP1) {
-            tcg_abort();
-        }
-
-        /* ??? Consider just making this a subroutine.  */
-
-        /* A... ...H -> H... ...A */
-        tcg_out_dsll(s, ret, arg, 56);
-        tcg_out_dsrl(s, TCG_TMP0, arg, 56);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
-
-        /* .B.. ..G. -> .G.. ..B. */
-        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00);
-        tcg_out_dsrl(s, TCG_TMP1, arg, 40);
-        tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 40);
-        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1);
-
-        /* ..CD .... -> .... DC.. */
-        tcg_out_dsrl(s, TCG_TMP0, arg, 32);
-        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff00);
-        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff);
-        tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 8);
-        tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 24);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
-
-        /* .... EF.. -> ..FE .... */
-        tcg_out_dsrl(s, TCG_TMP0, arg, 16);
-        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff00);
-        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff);
-        tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24);
-        tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 40);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+        assert(ret == TCG_REG_V0);
+        tcg_out_bswap_subr(s, bswap64_addr);
+        /* delay slot */
+        tcg_out_opc_reg(s, OPC_OR, TCG_REG_A0, arg, TCG_REG_ZERO);
     }
 }
 
@@ -1425,72 +1393,111 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 }
 #endif
 
-static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
                                    TCGReg base, TCGMemOp opc, bool is_64)
 {
+    bool hi_first = MIPS_BE ? hi != base : lo == base;
+
     switch (opc & (MO_SSIZE | MO_BSWAP)) {
     case MO_UB:
-        tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0);
+        tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
         break;
     case MO_SB:
-        tcg_out_opc_imm(s, OPC_LB, datalo, base, 0);
+        tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
         break;
 
     case MO_UW | MO_BSWAP:
         tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
-        tcg_out_bswap16(s, datalo, TCG_TMP1);
+        tcg_out_bswap16(s, lo, TCG_TMP1);
         break;
     case MO_UW:
-        tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0);
+        tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
         break;
 
     case MO_SW | MO_BSWAP:
         tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
-        tcg_out_bswap16s(s, datalo, TCG_TMP1);
+        tcg_out_bswap16s(s, lo, TCG_TMP1);
         break;
     case MO_SW:
-        tcg_out_opc_imm(s, OPC_LH, datalo, base, 0);
+        tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
         break;
 
     case MO_UL | MO_BSWAP:
         if (TCG_TARGET_REG_BITS == 64 && is_64) {
-            tcg_out_opc_imm(s, OPC_LWU, TCG_TMP1, base, 0);
-            tcg_out_bswap32u(s, datalo, TCG_TMP1);
+            if (use_mips32r2_instructions) {
+                tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
+                tcg_out_bswap32u(s, lo, lo);
+            } else {
+                tcg_out_bswap_subr(s, bswap32u_addr);
+                /* delay slot */
+                tcg_out_opc_imm(s, OPC_LWU, TCG_REG_A0, base, 0);
+                tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_REG_V0);
+            }
             break;
         }
         /* FALLTHRU */
     case MO_SL | MO_BSWAP:
-        tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0);
-        tcg_out_bswap32(s, datalo, TCG_TMP1);
+        if (use_mips32r2_instructions) {
+            tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
+            tcg_out_bswap32(s, lo, lo);
+        } else {
+            tcg_out_bswap_subr(s, bswap32s_addr);
+            /* delay slot */
+            tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, base, 0);
+            tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_V0);
+        }
         break;
 
     case MO_UL:
         if (TCG_TARGET_REG_BITS == 64 && is_64) {
-            tcg_out_opc_imm(s, OPC_LWU, datalo, base, 0);
+            tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
             break;
         }
         /* FALLTHRU */
     case MO_SL:
-        tcg_out_opc_imm(s, OPC_LW, datalo, base, 0);
+        tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
         break;
 
     case MO_Q | MO_BSWAP:
-        if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF);
-            tcg_out_bswap32(s, datalo, TCG_TMP1);
-            tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF);
-            tcg_out_bswap32(s, datahi, TCG_TMP1);
+        if (TCG_TARGET_REG_BITS == 64 && use_mips32r2_instructions) {
+            tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+            tcg_out_bswap64(s, lo, lo);
+        } else if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_bswap_subr(s, bswap64_addr);
+            /* delay slot */
+            tcg_out_opc_imm(s, OPC_LD, TCG_REG_A0, base, 0);
+            tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_REG_V0);
+        } else if (use_mips32r2_instructions) {
+            tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
+            tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 4);
+            tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
+            tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
+            tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
+            tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
         } else {
-            tcg_out_opc_imm(s, OPC_LD, TCG_REG_V0, base, 0);
-            tcg_out_bswap64(s, datalo, TCG_REG_V0);
+            tcg_out_bswap_subr(s, bswap32s_addr);
+            /* delay slot */
+            tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, base,
+                            hi_first ? LO_OFF : HI_OFF);
+            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, TCG_REG_V0);
+
+            tcg_out_bswap_subr(s, bswap32s_addr);
+            /* delay slot */
+            tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, base,
+                            hi_first ? LO_OFF : HI_OFF);
+            tcg_out_mov(s, TCG_TYPE_I32, hi_first ? lo : hi, TCG_REG_V0);
+            tcg_out_mov(s, TCG_TYPE_I32, hi_first ? hi : lo, TCG_REG_A2);
         }
         break;
     case MO_Q:
-        if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF);
-            tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF);
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+        } else if (hi_first) {
+            tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF);
+            tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF);
         } else {
-            tcg_out_opc_imm(s, OPC_LD, datalo, base, 0);
+            tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF);
+            tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF);
         }
         break;
     default:
@@ -1540,54 +1547,62 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
 #endif
 }
 
-static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
                                    TCGReg base, TCGMemOp opc)
 {
-    if ((datalo | datahi) == 0) {
+    /* Don't clutter the code below with checks to avoid bswapping ZERO.  */
+    if ((lo | hi) == 0) {
         opc &= ~MO_BSWAP;
     }
 
     switch (opc & (MO_SIZE | MO_BSWAP)) {
     case MO_8:
-        tcg_out_opc_imm(s, OPC_SB, datalo, base, 0);
+        tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
         break;
 
     case MO_16 | MO_BSWAP:
-        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff);
+        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, lo, 0xffff);
         tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1);
-        datalo = TCG_TMP1;
+        lo = TCG_TMP1;
         /* FALLTHRU */
     case MO_16:
-        tcg_out_opc_imm(s, OPC_SH, datalo, base, 0);
+        tcg_out_opc_imm(s, OPC_SH, lo, base, 0);
         break;
 
     case MO_32 | MO_BSWAP:
-        tcg_out_bswap32(s, TCG_TMP1, datalo);
-        datalo = TCG_TMP1;
+        tcg_out_bswap32(s, TCG_REG_V0, lo);
+        lo = TCG_REG_V0;
         /* FALLTHRU */
     case MO_32:
-        tcg_out_opc_imm(s, OPC_SW, datalo, base, 0);
+        tcg_out_opc_imm(s, OPC_SW, lo, base, 0);
         break;
 
     case MO_64 | MO_BSWAP:
-        if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_bswap32(s, TCG_TMP1, datalo);
-            datalo = TCG_TMP1;
-            tcg_out_opc_imm(s, OPC_SW, datalo, base, HI_OFF);
-            tcg_out_bswap32(s, TCG_TMP1, datahi);
-            datahi = TCG_TMP1;
-            tcg_out_opc_imm(s, OPC_SW, datahi, base, LO_OFF);
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_bswap64(s, TCG_REG_V0, lo);
+            lo = TCG_REG_V0;
+        } else if (use_mips32r2_instructions) {
+            tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? lo : hi);
+            tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? hi : lo);
+            tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
+            tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
+            tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0);
+            tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4);
+            break;
+        } else {
+            tcg_out_bswap32(s, TCG_REG_V0, lo);
+            tcg_out_opc_imm(s, OPC_SW, TCG_REG_V0, base, HI_OFF);
+            tcg_out_bswap32(s, TCG_REG_V0, hi);
+            tcg_out_opc_imm(s, OPC_SW, TCG_REG_V0, base, LO_OFF);
             break;
         }
-        tcg_out_bswap64(s, TCG_REG_A1, datalo);
-        datalo = TCG_REG_A1;
         /* FALLTHRU */
     case MO_64:
         if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF);
-            tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF);
+            tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? hi : lo, base, 0);
+            tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4);
         } else {
-            tcg_out_opc_imm(s, OPC_SD, datalo, base, 0);
+            tcg_out_opc_imm(s, OPC_SD, lo, base, 0);
         }
         break;
 
@@ -2117,7 +2132,7 @@ static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_rotl_i32, { "r", "rZ", "ri" } },
 
     { INDEX_op_bswap16_i32, { "r", "r" } },
-    { INDEX_op_bswap32_i32, { "r", "r" } },
+    { INDEX_op_bswap32_i32, { "v", "a" } },
 
     { INDEX_op_ext8s_i32, { "r", "rZ" } },
     { INDEX_op_ext16s_i32, { "r", "rZ" } },
@@ -2179,8 +2194,8 @@ static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_rotl_i64, { "r", "rZ", "ri" } },
 
     { INDEX_op_bswap16_i64, { "r", "r" } },
-    { INDEX_op_bswap32_i64, { "r", "r" } },
-    { INDEX_op_bswap64_i64, { "r", "r" } },
+    { INDEX_op_bswap32_i64, { "v", "a" } },
+    { INDEX_op_bswap64_i64, { "v", "a" } },
 
     { INDEX_op_ext8s_i64, { "r", "rZ" } },
     { INDEX_op_ext16s_i64, { "r", "rZ" } },
@@ -2324,6 +2339,16 @@ static void tcg_target_detect_isa(void)
 /* We're expecting to be able to use an immediate for frame allocation.  */
 QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7fff);
 
+static tcg_insn_unit *align_code_ptr(TCGContext *s)
+{
+    uintptr_t p = (uintptr_t)s->code_ptr;
+    if (p & 15) {
+        p = (p + 15) & -16;
+        s->code_ptr = (void *)p;
+    }
+    return s->code_ptr;
+}
+
 /* Generate global QEMU prologue and epilogue code */
 static void tcg_target_qemu_prologue(TCGContext *s)
 {
@@ -2353,6 +2378,128 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
     /* delay slot */
     tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
+
+    if (use_mips32r2_instructions) {
+        return;
+    }
+
+    /* Bswap subroutines: Input in TCG_REG_A0, output in TCG_REG_V0;
+       clobbers TCG_TMP1, TCG_TMP0.  */
+
+    bswap32s_addr = align_code_ptr(s);
+
+    /*
+     * bswap32s -- signed 32-bit swap.  a0 = abcd.
+     */
+    /* v0 = (ssss)d000 */
+    tcg_out_opc_sa(s, OPC_SLL, TCG_REG_V0, TCG_REG_A0, 24);
+    /* t1 = 000a */
+    tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_REG_A0, 24);
+    /* t0 = 00c0 */
+    tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_REG_A0, 0xff00);
+    /* v0 = d00a */
+    tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+    /* t1 = 0abc */
+    tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_REG_A0, 8);
+    /* t0 = 0c00 */
+    tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
+    /* t1 = 00b0 */
+    tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+    /* v0 = dc0a */
+    tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0);
+    tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+    /* v0 = dcba -- delay slot */
+    tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+
+    if (TCG_TARGET_REG_BITS == 32) {
+        return;
+    }
+
+    bswap32u_addr = align_code_ptr(s);
+
+    /*
+     * bswap32u -- unsigned 32-bit swap.  a0 = ....abcd.
+     */
+    /* t1 = (0000)000d */
+    tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_REG_A0, 0xff);
+    /* v0 = 000a */
+    tcg_out_opc_sa(s, OPC_SRL, TCG_REG_V0, TCG_REG_A0, 24);
+    /* t1 = (0000)d000 */
+    tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24);
+    /* t0 = 00c0 */
+    tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_REG_A0, 0xff00);
+    /* v0 = d00a */
+    tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+    /* t1 = 0abc */
+    tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_REG_A0, 8);
+    /* t0 = 0c00 */
+    tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
+    /* t1 = 00b0 */
+    tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+    /* v0 = dc0a */
+    tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0);
+    tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+    /* v0 = dcba -- delay slot */
+    tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+
+    bswap64_addr = align_code_ptr(s);
+
+    /*
+     * bswap64 -- 64-bit swap.  a0 = abcdefgh
+     */
+    /* v0 = h0000000 */
+    tcg_out_dsll(s, TCG_REG_V0, TCG_REG_A0, 56);
+    /* t1 = 0000000a */
+    tcg_out_dsrl(s, TCG_TMP1, TCG_REG_A0, 56);
+
+    /* t0 = 000000g0 */
+    tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_REG_A0, 0xff00);
+    /* v0 = h000000a */
+    tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+    /* t1 = 00000abc */
+    tcg_out_dsrl(s, TCG_TMP1, TCG_REG_A0, 40);
+    /* t0 = 0g000000 */
+    tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 40);
+    /* t1 = 000000b0 */
+    tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+
+    /* v0 = hg00000a */
+    tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0);
+    /* t0 = 0000abcd */
+    tcg_out_dsrl(s, TCG_TMP0, TCG_REG_A0, 32);
+    /* v0 = hg0000ba */
+    tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+
+    /* t1 = 000000c0 */
+    tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff00);
+    /* t0 = 0000000d */
+    tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff);
+    /* t1 = 00000c00 */
+    tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 8);
+    /* t0 = 0000d000 */
+    tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 24);
+
+    /* v0 = hg000cba */
+    tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+    /* t1 = 00abcdef */
+    tcg_out_dsrl(s, TCG_TMP1, TCG_REG_A0, 16);
+    /* v0 = hg00dcba */
+    tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0);
+
+    /* t0 = 0000000f */
+    tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP1, 0x00ff);
+    /* t1 = 000000e0 */
+    tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+    /* t0 = 00f00000 */
+    tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 40);
+    /* t1 = 000e0000 */
+    tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24);
+
+    /* v0 = hgf0dcba */
+    tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0);
+    tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+    /* v0 = hgfedcba -- delay slot */
+    tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
 }
 
 static void tcg_target_init(TCGContext *s)
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 0dab62b..374d803 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -128,6 +128,7 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_muls2_i32        (!use_mips32r6_instructions)
 #define TCG_TARGET_HAS_muluh_i32        1
 #define TCG_TARGET_HAS_mulsh_i32        1
+#define TCG_TARGET_HAS_bswap32_i32      1
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_add2_i32         0
@@ -150,12 +151,13 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_mulsh_i64        1
 #define TCG_TARGET_HAS_ext32s_i64       1
 #define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_bswap32_i64      1
+#define TCG_TARGET_HAS_bswap64_i64      1
 #endif
 
 /* optional instructions detected at runtime */
 #define TCG_TARGET_HAS_movcond_i32      use_movnz_instructions
 #define TCG_TARGET_HAS_bswap16_i32      use_mips32r2_instructions
-#define TCG_TARGET_HAS_bswap32_i32      use_mips32r2_instructions
 #define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
 #define TCG_TARGET_HAS_ext8s_i32        use_mips32r2_instructions
 #define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
@@ -164,8 +166,6 @@ extern bool use_mips32r2_instructions;
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_movcond_i64      use_movnz_instructions
 #define TCG_TARGET_HAS_bswap16_i64      use_mips32r2_instructions
-#define TCG_TARGET_HAS_bswap32_i64      use_mips32r2_instructions
-#define TCG_TARGET_HAS_bswap64_i64      use_mips32r2_instructions
 #define TCG_TARGET_HAS_deposit_i64      use_mips32r2_instructions
 #define TCG_TARGET_HAS_ext8s_i64        use_mips32r2_instructions
 #define TCG_TARGET_HAS_ext16s_i64       use_mips32r2_instructions
-- 
2.5.0

  parent reply	other threads:[~2016-02-09 10:40 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-09 10:39 [Qemu-devel] [PATCH 00/15] tcg mips64 and mipsr6 improvements Richard Henderson
2016-02-09 10:39 ` [Qemu-devel] [PATCH 01/15] tcg-mips: Add mips64 opcodes Richard Henderson
2016-02-09 10:39 ` [Qemu-devel] [PATCH 02/15] tcg-mips: Support 64-bit opcodes Richard Henderson
2016-02-09 15:24   ` James Hogan
2016-02-09 17:16     ` Richard Henderson
2016-02-09 10:39 ` [Qemu-devel] [PATCH 03/15] tcg-mips: Adjust move functions for mips64 Richard Henderson
2016-02-09 10:39 ` [Qemu-devel] [PATCH 04/15] tcg-mips: Adjust load/store " Richard Henderson
2016-02-09 10:39 ` [Qemu-devel] [PATCH 05/15] tcg-mips: Adjust prologue " Richard Henderson
2016-02-09 10:39 ` [Qemu-devel] [PATCH 06/15] tcg-mips: Add tcg unwind info Richard Henderson
2016-02-09 10:39 ` [Qemu-devel] [PATCH 07/15] tcg-mips: Adjust qemu_ld/st for mips64 Richard Henderson
2016-02-10 16:34   ` James Hogan
2016-02-10 17:35     ` Richard Henderson
2016-02-09 10:39 ` [Qemu-devel] [PATCH 08/15] tcg-mips: Adjust calling conventions " Richard Henderson
2016-02-09 10:39 ` [Qemu-devel] [PATCH 09/15] tcg-mips: Fix exit_tb " Richard Henderson
2016-02-09 10:39 ` Richard Henderson [this message]
2016-02-09 10:39 ` [Qemu-devel] [PATCH 11/15] tcg-mips: Use mips64r6 instructions in tcg_out_movi Richard Henderson
2016-02-09 16:50   ` James Hogan
2016-02-09 17:20     ` Richard Henderson
2016-02-09 17:25     ` Richard Henderson
2016-02-10  0:32     ` James Hogan
2016-02-09 10:40 ` [Qemu-devel] [PATCH 12/15] tcg-mips: Use mips64r6 instructions in tcg_out_ldst Richard Henderson
2016-02-09 10:40 ` [Qemu-devel] [PATCH 13/15] tcg-mips: Use mips64r6 instructions in constant addition Richard Henderson
2016-02-09 10:40 ` [Qemu-devel] [PATCH 14/15] tcg-mips: Use mipsr6 instructions in branches Richard Henderson
2016-02-09 16:22   ` James Hogan
2016-02-09 17:13     ` Richard Henderson
2016-02-09 18:46       ` Maciej W. Rozycki
2016-02-10  0:20     ` James Hogan
2016-02-09 10:40 ` [Qemu-devel] [PATCH 15/15] tcg-mips: Use mipsr6 instructions in calls Richard Henderson
2016-02-10 12:49   ` James Hogan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1455014403-10742-11-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=aurelien@aurel32.net \
    --cc=james.hogan@imgtec.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.