From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46303) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aT5is-0006ed-DD for qemu-devel@nongnu.org; Tue, 09 Feb 2016 05:40:45 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1aT5ip-0005RZ-Q0 for qemu-devel@nongnu.org; Tue, 09 Feb 2016 05:40:42 -0500 Received: from mail-pf0-x243.google.com ([2607:f8b0:400e:c00::243]:34104) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aT5ip-0005R9-6K for qemu-devel@nongnu.org; Tue, 09 Feb 2016 05:40:39 -0500 Received: by mail-pf0-x243.google.com with SMTP id 71so4789102pfv.1 for ; Tue, 09 Feb 2016 02:40:39 -0800 (PST) Sender: Richard Henderson From: Richard Henderson Date: Tue, 9 Feb 2016 21:39:58 +1100 Message-Id: <1455014403-10742-11-git-send-email-rth@twiddle.net> In-Reply-To: <1455014403-10742-1-git-send-email-rth@twiddle.net> References: <1455014403-10742-1-git-send-email-rth@twiddle.net> Subject: [Qemu-devel] [PATCH 10/15] tcg-mips: Move bswap code to subroutines List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: james.hogan@imgtec.com, aurelien@aurel32.net Without the mips32r2 / mips64r2 instructions to perform swapping, 32 and 64-bit bswap is quite large. Move them to a subroutine in the prologue block to minimize code bloat. Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c | 389 ++++++++++++++++++++++++++++++++++---------------- tcg/mips/tcg-target.h | 6 +- 2 files changed, 271 insertions(+), 124 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index b8c5d90..97f9251 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -135,6 +135,9 @@ static const TCGReg tcg_target_call_oarg_regs[2] = { }; static tcg_insn_unit *tb_ret_addr; +static tcg_insn_unit *bswap32s_addr; +static tcg_insn_unit *bswap32u_addr; +static tcg_insn_unit *bswap64_addr; static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target) { @@ -187,6 +190,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct_str = *pct_str; switch(ct_str[0]) { case 'r': + do_default: ct->ct |= TCG_CT_REG; tcg_regset_set(ct->u.regs, 0xffffffff); break; @@ -208,6 +212,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) case 'S': /* qemu_st constraint */ ct->ct |= TCG_CT_REG; tcg_regset_set(ct->u.regs, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_V0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); #if defined(CONFIG_SOFTMMU) if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { @@ -218,6 +223,22 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) } #endif break; + case 'v': /* bswap output constraint */ + if (use_mips32r2_instructions) { + goto do_default; + } + ct->ct |= TCG_CT_REG; + tcg_regset_clear(ct->u.regs); + tcg_regset_set_reg(ct->u.regs, TCG_REG_V0); + break; + case 'a': /* bswap input constraint */ + if (use_mips32r2_instructions) { + goto do_default; + } + ct->ct |= TCG_CT_REG; + tcg_regset_clear(ct->u.regs); + tcg_regset_set_reg(ct->u.regs, TCG_REG_A0); + break; case 'I': ct->ct |= TCG_CT_CONST_U16; break; @@ -618,29 +639,23 @@ static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) } } +static void tcg_out_bswap_subr(TCGContext *s, tcg_insn_unit *sub) +{ + if (!tcg_out_opc_jmp(s, OPC_JAL, sub)) { + tcg_abort(); + } +} + static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) { if (use_mips32r2_instructions) { tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); } else { - /* ret and arg must be different and can't be register at */ - if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) { - tcg_abort(); - } - - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00); - tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + assert(ret == TCG_REG_V0); + tcg_out_bswap_subr(s, bswap32s_addr); + /* delay slot */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_A0, arg, TCG_REG_ZERO); } } @@ -648,26 +663,13 @@ static inline void tcg_out_bswap32u(TCGContext *s, TCGReg ret, TCGReg arg) { if (use_mips32r2_instructions) { tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg); - tcg_out_opc_reg(s, OPC_DSHD, ret, 0, arg); + tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret); tcg_out_dsrl(s, ret, ret, 32); } else { - /* ret and arg must be different and can't be register at */ - if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) { - tcg_abort(); - } - - tcg_out_dsll(s, ret, arg, 24); - - tcg_out_dsrl(s, TCG_TMP0, arg, 24); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00); - tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 8); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - - tcg_out_dsrl(s, TCG_TMP0, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + assert(ret == TCG_REG_V0); + tcg_out_bswap_subr(s, bswap32u_addr); + /* delay slot */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_A0, arg, TCG_REG_ZERO); } } @@ -677,44 +679,10 @@ static void tcg_out_bswap64(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg); tcg_out_opc_reg(s, OPC_DSHD, ret, 0, arg); } else { - /* ret and arg must be different and can't be either tmp reg. */ - if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0 - || ret == TCG_TMP1 || arg == TCG_TMP1) { - tcg_abort(); - } - - /* ??? Consider just making this a subroutine. */ - - /* A... ...H -> H... ...A */ - tcg_out_dsll(s, ret, arg, 56); - tcg_out_dsrl(s, TCG_TMP0, arg, 56); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - - /* .B.. ..G. -> .G.. ..B. */ - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00); - tcg_out_dsrl(s, TCG_TMP1, arg, 40); - tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 40); - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1); - - /* ..CD .... -> .... DC.. */ - tcg_out_dsrl(s, TCG_TMP0, arg, 32); - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff00); - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff); - tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 8); - tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 24); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - - /* .... EF.. -> ..FE .... */ - tcg_out_dsrl(s, TCG_TMP0, arg, 16); - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff00); - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff); - tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24); - tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 40); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + assert(ret == TCG_REG_V0); + tcg_out_bswap_subr(s, bswap64_addr); + /* delay slot */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_A0, arg, TCG_REG_ZERO); } } @@ -1425,72 +1393,111 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } #endif -static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, TCGMemOp opc, bool is_64) { + bool hi_first = MIPS_BE ? hi != base : lo == base; + switch (opc & (MO_SSIZE | MO_BSWAP)) { case MO_UB: - tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0); + tcg_out_opc_imm(s, OPC_LBU, lo, base, 0); break; case MO_SB: - tcg_out_opc_imm(s, OPC_LB, datalo, base, 0); + tcg_out_opc_imm(s, OPC_LB, lo, base, 0); break; case MO_UW | MO_BSWAP: tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16(s, datalo, TCG_TMP1); + tcg_out_bswap16(s, lo, TCG_TMP1); break; case MO_UW: - tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0); + tcg_out_opc_imm(s, OPC_LHU, lo, base, 0); break; case MO_SW | MO_BSWAP: tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); - tcg_out_bswap16s(s, datalo, TCG_TMP1); + tcg_out_bswap16s(s, lo, TCG_TMP1); break; case MO_SW: - tcg_out_opc_imm(s, OPC_LH, datalo, base, 0); + tcg_out_opc_imm(s, OPC_LH, lo, base, 0); break; case MO_UL | MO_BSWAP: if (TCG_TARGET_REG_BITS == 64 && is_64) { - tcg_out_opc_imm(s, OPC_LWU, TCG_TMP1, base, 0); - tcg_out_bswap32u(s, datalo, TCG_TMP1); + if (use_mips32r2_instructions) { + tcg_out_opc_imm(s, OPC_LWU, lo, base, 0); + tcg_out_bswap32u(s, lo, lo); + } else { + tcg_out_bswap_subr(s, bswap32u_addr); + /* delay slot */ + tcg_out_opc_imm(s, OPC_LWU, TCG_REG_A0, base, 0); + tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_REG_V0); + } break; } /* FALLTHRU */ case MO_SL | MO_BSWAP: - tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0); - tcg_out_bswap32(s, datalo, TCG_TMP1); + if (use_mips32r2_instructions) { + tcg_out_opc_imm(s, OPC_LW, lo, base, 0); + tcg_out_bswap32(s, lo, lo); + } else { + tcg_out_bswap_subr(s, bswap32s_addr); + /* delay slot */ + tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, base, 0); + tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_V0); + } break; case MO_UL: if (TCG_TARGET_REG_BITS == 64 && is_64) { - tcg_out_opc_imm(s, OPC_LWU, datalo, base, 0); + tcg_out_opc_imm(s, OPC_LWU, lo, base, 0); break; } /* FALLTHRU */ case MO_SL: - tcg_out_opc_imm(s, OPC_LW, datalo, base, 0); + tcg_out_opc_imm(s, OPC_LW, lo, base, 0); break; case MO_Q | MO_BSWAP: - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF); - tcg_out_bswap32(s, datalo, TCG_TMP1); - tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF); - tcg_out_bswap32(s, datahi, TCG_TMP1); + if (TCG_TARGET_REG_BITS == 64 && use_mips32r2_instructions) { + tcg_out_opc_imm(s, OPC_LD, lo, base, 0); + tcg_out_bswap64(s, lo, lo); + } else if (TCG_TARGET_REG_BITS == 64) { + tcg_out_bswap_subr(s, bswap64_addr); + /* delay slot */ + tcg_out_opc_imm(s, OPC_LD, TCG_REG_A0, base, 0); + tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_REG_V0); + } else if (use_mips32r2_instructions) { + tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0); + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 4); + tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0); + tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1); + tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16); + tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16); } else { - tcg_out_opc_imm(s, OPC_LD, TCG_REG_V0, base, 0); - tcg_out_bswap64(s, datalo, TCG_REG_V0); + tcg_out_bswap_subr(s, bswap32s_addr); + /* delay slot */ + tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, base, + hi_first ? LO_OFF : HI_OFF); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, TCG_REG_V0); + + tcg_out_bswap_subr(s, bswap32s_addr); + /* delay slot */ + tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, base, + hi_first ? LO_OFF : HI_OFF); + tcg_out_mov(s, TCG_TYPE_I32, hi_first ? lo : hi, TCG_REG_V0); + tcg_out_mov(s, TCG_TYPE_I32, hi_first ? hi : lo, TCG_REG_A2); } break; case MO_Q: - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF); - tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF); + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_opc_imm(s, OPC_LD, lo, base, 0); + } else if (hi_first) { + tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF); + tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF); } else { - tcg_out_opc_imm(s, OPC_LD, datalo, base, 0); + tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF); + tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF); } break; default: @@ -1540,54 +1547,62 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) #endif } -static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, TCGMemOp opc) { - if ((datalo | datahi) == 0) { + /* Don't clutter the code below with checks to avoid bswapping ZERO. */ + if ((lo | hi) == 0) { opc &= ~MO_BSWAP; } switch (opc & (MO_SIZE | MO_BSWAP)) { case MO_8: - tcg_out_opc_imm(s, OPC_SB, datalo, base, 0); + tcg_out_opc_imm(s, OPC_SB, lo, base, 0); break; case MO_16 | MO_BSWAP: - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff); + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, lo, 0xffff); tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1); - datalo = TCG_TMP1; + lo = TCG_TMP1; /* FALLTHRU */ case MO_16: - tcg_out_opc_imm(s, OPC_SH, datalo, base, 0); + tcg_out_opc_imm(s, OPC_SH, lo, base, 0); break; case MO_32 | MO_BSWAP: - tcg_out_bswap32(s, TCG_TMP1, datalo); - datalo = TCG_TMP1; + tcg_out_bswap32(s, TCG_REG_V0, lo); + lo = TCG_REG_V0; /* FALLTHRU */ case MO_32: - tcg_out_opc_imm(s, OPC_SW, datalo, base, 0); + tcg_out_opc_imm(s, OPC_SW, lo, base, 0); break; case MO_64 | MO_BSWAP: - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_bswap32(s, TCG_TMP1, datalo); - datalo = TCG_TMP1; - tcg_out_opc_imm(s, OPC_SW, datalo, base, HI_OFF); - tcg_out_bswap32(s, TCG_TMP1, datahi); - datahi = TCG_TMP1; - tcg_out_opc_imm(s, OPC_SW, datahi, base, LO_OFF); + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_bswap64(s, TCG_REG_V0, lo); + lo = TCG_REG_V0; + } else if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? lo : hi); + tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? hi : lo); + tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16); + tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16); + tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0); + tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4); + break; + } else { + tcg_out_bswap32(s, TCG_REG_V0, lo); + tcg_out_opc_imm(s, OPC_SW, TCG_REG_V0, base, HI_OFF); + tcg_out_bswap32(s, TCG_REG_V0, hi); + tcg_out_opc_imm(s, OPC_SW, TCG_REG_V0, base, LO_OFF); break; } - tcg_out_bswap64(s, TCG_REG_A1, datalo); - datalo = TCG_REG_A1; /* FALLTHRU */ case MO_64: if (TCG_TARGET_REG_BITS == 32) { - tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF); - tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF); + tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? hi : lo, base, 0); + tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4); } else { - tcg_out_opc_imm(s, OPC_SD, datalo, base, 0); + tcg_out_opc_imm(s, OPC_SD, lo, base, 0); } break; @@ -2117,7 +2132,7 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, { INDEX_op_bswap16_i32, { "r", "r" } }, - { INDEX_op_bswap32_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "v", "a" } }, { INDEX_op_ext8s_i32, { "r", "rZ" } }, { INDEX_op_ext16s_i32, { "r", "rZ" } }, @@ -2179,8 +2194,8 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_rotl_i64, { "r", "rZ", "ri" } }, { INDEX_op_bswap16_i64, { "r", "r" } }, - { INDEX_op_bswap32_i64, { "r", "r" } }, - { INDEX_op_bswap64_i64, { "r", "r" } }, + { INDEX_op_bswap32_i64, { "v", "a" } }, + { INDEX_op_bswap64_i64, { "v", "a" } }, { INDEX_op_ext8s_i64, { "r", "rZ" } }, { INDEX_op_ext16s_i64, { "r", "rZ" } }, @@ -2324,6 +2339,16 @@ static void tcg_target_detect_isa(void) /* We're expecting to be able to use an immediate for frame allocation. */ QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7fff); +static tcg_insn_unit *align_code_ptr(TCGContext *s) +{ + uintptr_t p = (uintptr_t)s->code_ptr; + if (p & 15) { + p = (p + 15) & -16; + s->code_ptr = (void *)p; + } + return s->code_ptr; +} + /* Generate global QEMU prologue and epilogue code */ static void tcg_target_qemu_prologue(TCGContext *s) { @@ -2353,6 +2378,128 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0); /* delay slot */ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE); + + if (use_mips32r2_instructions) { + return; + } + + /* Bswap subroutines: Input in TCG_REG_A0, output in TCG_REG_V0; + clobbers TCG_TMP1, TCG_TMP0. */ + + bswap32s_addr = align_code_ptr(s); + + /* + * bswap32s -- signed 32-bit swap. a0 = abcd. + */ + /* v0 = (ssss)d000 */ + tcg_out_opc_sa(s, OPC_SLL, TCG_REG_V0, TCG_REG_A0, 24); + /* t1 = 000a */ + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_REG_A0, 24); + /* t0 = 00c0 */ + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_REG_A0, 0xff00); + /* v0 = d00a */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1); + /* t1 = 0abc */ + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_REG_A0, 8); + /* t0 = 0c00 */ + tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); + /* t1 = 00b0 */ + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00); + /* v0 = dc0a */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0); + /* v0 = dcba -- delay slot */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1); + + if (TCG_TARGET_REG_BITS == 32) { + return; + } + + bswap32u_addr = align_code_ptr(s); + + /* + * bswap32u -- unsigned 32-bit swap. a0 = ....abcd. + */ + /* t1 = (0000)000d */ + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_REG_A0, 0xff); + /* v0 = 000a */ + tcg_out_opc_sa(s, OPC_SRL, TCG_REG_V0, TCG_REG_A0, 24); + /* t1 = (0000)d000 */ + tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24); + /* t0 = 00c0 */ + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_REG_A0, 0xff00); + /* v0 = d00a */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1); + /* t1 = 0abc */ + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_REG_A0, 8); + /* t0 = 0c00 */ + tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); + /* t1 = 00b0 */ + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00); + /* v0 = dc0a */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0); + /* v0 = dcba -- delay slot */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1); + + bswap64_addr = align_code_ptr(s); + + /* + * bswap64 -- 64-bit swap. a0 = abcdefgh + */ + /* v0 = h0000000 */ + tcg_out_dsll(s, TCG_REG_V0, TCG_REG_A0, 56); + /* t1 = 0000000a */ + tcg_out_dsrl(s, TCG_TMP1, TCG_REG_A0, 56); + + /* t0 = 000000g0 */ + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_REG_A0, 0xff00); + /* v0 = h000000a */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1); + /* t1 = 00000abc */ + tcg_out_dsrl(s, TCG_TMP1, TCG_REG_A0, 40); + /* t0 = 0g000000 */ + tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 40); + /* t1 = 000000b0 */ + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00); + + /* v0 = hg00000a */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0); + /* t0 = 0000abcd */ + tcg_out_dsrl(s, TCG_TMP0, TCG_REG_A0, 32); + /* v0 = hg0000ba */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1); + + /* t1 = 000000c0 */ + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff00); + /* t0 = 0000000d */ + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff); + /* t1 = 00000c00 */ + tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 8); + /* t0 = 0000d000 */ + tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 24); + + /* v0 = hg000cba */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1); + /* t1 = 00abcdef */ + tcg_out_dsrl(s, TCG_TMP1, TCG_REG_A0, 16); + /* v0 = hg00dcba */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0); + + /* t0 = 0000000f */ + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP1, 0x00ff); + /* t1 = 000000e0 */ + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00); + /* t0 = 00f00000 */ + tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 40); + /* t1 = 000e0000 */ + tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24); + + /* v0 = hgf0dcba */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0); + /* v0 = hgfedcba -- delay slot */ + tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1); } static void tcg_target_init(TCGContext *s) diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 0dab62b..374d803 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -128,6 +128,7 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 +#define TCG_TARGET_HAS_bswap32_i32 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 @@ -150,12 +151,13 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_ext32s_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 +#define TCG_TARGET_HAS_bswap32_i64 1 +#define TCG_TARGET_HAS_bswap64_i64 1 #endif /* optional instructions detected at runtime */ #define TCG_TARGET_HAS_movcond_i32 use_movnz_instructions #define TCG_TARGET_HAS_bswap16_i32 use_mips32r2_instructions -#define TCG_TARGET_HAS_bswap32_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions @@ -164,8 +166,6 @@ extern bool use_mips32r2_instructions; #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_movcond_i64 use_movnz_instructions #define TCG_TARGET_HAS_bswap16_i64 use_mips32r2_instructions -#define TCG_TARGET_HAS_bswap32_i64 use_mips32r2_instructions -#define TCG_TARGET_HAS_bswap64_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions #define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions -- 2.5.0