diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index bb19a95..ec49b34 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -258,7 +258,8 @@ static inline int tcg_target_const_match(tcg_target_long val, #define OPC_JMP_long (0xe9) #define OPC_JMP_short (0xeb) #define OPC_LEA (0x8d) -#define OPC_MOVB_EvGv (0x88) /* stores, more or less */ +#define OPC_MOVB_EbGb (0x88) /* stores, more or less */ +#define OPC_MOVB_GbEb (0x8a) /* loads, more or less */ #define OPC_MOVL_EvGv (0x89) /* stores, more or less */ #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ #define OPC_MOVL_EvIz (0xc7) @@ -277,6 +278,7 @@ static inline int tcg_target_const_match(tcg_target_long val, #define OPC_SHIFT_1 (0xd1) #define OPC_SHIFT_Ib (0xc1) #define OPC_SHIFT_cl (0xd3) +#define OPC_SHRD_Ib (0xac | P_EXT) #define OPC_TESTL (0x85) #define OPC_XCHG_ax_r32 (0x90) @@ -710,6 +712,107 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) } } +static void tcg_out_deposit(TCGContext *s, int inout, int val, + unsigned ofs, unsigned len, int rexw) +{ + TCGType type = rexw ? TCG_TYPE_I64 : TCG_TYPE_I32; + tcg_target_ulong imask, vmask; + TCGRegSet live; + int scratch; + + /* Look for MOVB w/ %reg_h special case. */ + if (ofs == 8 && len == 8 && inout < 4 && val < 4) { + tcg_out_modrm(s, OPC_MOVB_GbEb, inout + 4, val); + return; + } + + /* Look for MOVB/MOVW special cases. */ + if (len == 16 + || (len == 8 + && (TCG_TARGET_REG_BITS == 64 || (inout < 4 && val < 4)))) { + /* If the offset is non-zero, and we have a deposit from self, + then we need a tempoarary. */ + if (ofs != 0 && inout == val) { + tcg_regset_clear(live); + tcg_regset_set_reg(live, inout); + val = tcg_scratch_alloc(s, type, live); + tcg_out_mov(s, type, val, inout); + } + + /* If the offset is non-zero, rotate the destination into place. */ + if (ofs != 0) { + tcg_out_shifti(s, SHIFT_ROR + rexw, inout, ofs); + } + + if (len == 8) { + tcg_out_modrm(s, OPC_MOVB_GbEb + P_REXB_R + P_REXB_RM, inout, val); + } else { + tcg_out_modrm(s, OPC_MOVL_GvEv + P_DATA16, inout, val); + } + + /* Restore the destination to its proper location. */ + if (ofs != 0) { + tcg_out_shifti(s, SHIFT_ROL + rexw, inout, ofs); + } + return; + } + + /* Otherwise we can't support this operation natively. It's possible to + play tricks with rotates and shld in order to implement this. While + this is much smaller than masks, but it turns out that shld is too slow + on many cpus. */ + tcg_regset_clear(live); + tcg_regset_set_reg(live, inout); + tcg_regset_set_reg(live, val); + scratch = tcg_scratch_alloc(s, type, live); + + vmask = ((tcg_target_ulong)1 << len) - 1; + imask = ~(vmask << ofs); + + /* Careful, some 64-bit masks cannot use immediate operands. */ + if (type == TCG_TYPE_I64 && imask != (int32_t)imask) { + bool val_scratch = false; + + /* Since we are going to clobber INOUT first, the destination + bitfield cannot overlap the input bits. */ + if (inout == val && ofs < len) { + tcg_regset_set_reg(live, scratch); + val = tcg_scratch_alloc(s, type, live); + tcg_out_mov(s, type, val, inout); + val_scratch = true; + } + + tcg_out_movi(s, type, scratch, imask); + tgen_arithr(s, ARITH_AND + rexw, inout, scratch); + + if (vmask > 0xffffffffu) { + tcg_out_movi(s, type, scratch, vmask); + tgen_arithr(s, ARITH_AND + P_REXW, scratch, val); + } else { + if (val_scratch) { + scratch = val; + } else { + tcg_out_mov(s, TCG_TYPE_I32, scratch, val); + } + tgen_arithi(s, ARITH_AND, scratch, vmask, 0); + } + + tcg_out_shifti(s, SHIFT_SHL + P_REXW, scratch, ofs); + tgen_arithr(s, ARITH_OR + P_REXW, inout, scratch); + return; + } + + /* Both IMASK and VMASK are valid immediate operands, which means that + VAL may be treated as a 32-bit value. */ + tcg_out_mov(s, TCG_TYPE_I32, scratch, val); + tgen_arithi(s, ARITH_AND, scratch, vmask, 0); + tcg_out_shifti (s, SHIFT_SHL + rexw, scratch, ofs); + + tgen_arithi(s, ARITH_AND + rexw, inout, imask, 0); + tgen_arithr(s, ARITH_OR + rexw, inout, scratch); +} + + /* Use SMALL != 0 to force a short forward branch. */ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small) { @@ -1266,7 +1369,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, switch (sizeop) { case 0: - tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVB_EbGb + P_REXB_R, datalo, base, ofs); break; case 1: if (bswap) { @@ -1504,7 +1607,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; OP_32_64(st8): - tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, + tcg_out_modrm_offset(s, OPC_MOVB_EbGb | P_REXB_R, args[0], args[1], args[2]); break; OP_32_64(st16): @@ -1603,6 +1706,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; + OP_32_64(deposit): + tcg_out_deposit(s, args[0], args[2], args[3], args[4], rexw); + break; + case INDEX_op_brcond_i32: tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1], args[3], 0); @@ -1783,6 +1890,7 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_sar_i32, { "r", "0", "ci" } }, { INDEX_op_rotl_i32, { "r", "0", "ci" } }, { INDEX_op_rotr_i32, { "r", "0", "ci" } }, + { INDEX_op_deposit_i32, { "r", "0", "r" } }, { INDEX_op_brcond_i32, { "r", "ri" } }, @@ -1835,6 +1943,7 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_sar_i64, { "r", "0", "ci" } }, { INDEX_op_rotl_i64, { "r", "0", "ci" } }, { INDEX_op_rotr_i64, { "r", "0", "ci" } }, + { INDEX_op_deposit_i64, { "r", "0", "r" } }, { INDEX_op_brcond_i64, { "r", "re" } }, { INDEX_op_setcond_i64, { "r", "r", "re" } }, diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index bfafbfc..9f90d17 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -77,6 +77,7 @@ enum { /* optional instructions */ #define TCG_TARGET_HAS_div2_i32 #define TCG_TARGET_HAS_rot_i32 +#define TCG_TARGET_HAS_deposit_i32 #define TCG_TARGET_HAS_ext8s_i32 #define TCG_TARGET_HAS_ext16s_i32 #define TCG_TARGET_HAS_ext8u_i32 @@ -94,6 +95,7 @@ enum { #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 #define TCG_TARGET_HAS_rot_i64 +#define TCG_TARGET_HAS_deposit_i64 #define TCG_TARGET_HAS_ext8s_i64 #define TCG_TARGET_HAS_ext16s_i64 #define TCG_TARGET_HAS_ext32s_i64