Re: [Qemu-devel] [PATCH v3 16/21] target-mips: add new Floating Point instructions

From: Yongbok Kim <yongbok.kim@imgtec.com>
To: Leon Alrae <leon.alrae@imgtec.com>, qemu-devel@nongnu.org
Cc: aurelien@aurel32.net, rth@twiddle.net
Subject: Re: [Qemu-devel] [PATCH v3 16/21] target-mips: add new Floating Point instructions
Date: Thu, 2 Oct 2014 17:10:38 +0100	[thread overview]
Message-ID: <542D78FE.2000901@imgtec.com> (raw)
In-Reply-To: <1403882530-47821-17-git-send-email-leon.alrae@imgtec.com>

Hi,

Overall looking good but has issues with not freeing tcg_temps and some 
style problem which
failed with the checkpatch.pl script.

Otherwise
Reviewed-by: Yongbok Kim <yongbok.kim@imgtec.com>

Regards,
Yongbok

On 27/06/2014 16:22, Leon Alrae wrote:
> In terms of encoding MIPS32R6 MIN.fmt, MAX.fmt, MINA.fmt, MAXA.fmt replaced
> MIPS-3D RECIP1, RECIP2, RSQRT1, RSQRT2 instructions.
>
> In R6 all Floating Point instructions are supposed to be IEEE-2008 compliant
> i.e. FIR.HAS2008 always 1. However, QEMU softfloat for MIPS has not been
> updated yet.
>
> Signed-off-by: Leon Alrae <leon.alrae@imgtec.com>
> ---
> v3:
> * use FOP_PROTO for new instructions and create FLOAT_RINT macro to be
>    consistent
> * use TCG_CALL_NO_RWG_SE flag for float_class helper
> ---
>   disas/mips.c            |   22 +++
>   target-mips/helper.h    |   20 ++
>   target-mips/op_helper.c |  104 +++++++++++
>   target-mips/translate.c |  449 ++++++++++++++++++++++++++++++++++++++++++-----
>   4 files changed, 547 insertions(+), 48 deletions(-)
>
> diff --git a/disas/mips.c b/disas/mips.c
> index e041858..6196d2e 100644
> --- a/disas/mips.c
> +++ b/disas/mips.c
> @@ -1263,6 +1263,28 @@ const struct mips_opcode mips_builtin_opcodes[] =
>   {"cache",   "k,o(b)",   0x7c000025, 0xfc00003f, RD_b,                 0, I32R6},
>   {"seleqz",  "d,v,t",    0x00000035, 0xfc0007ff, WR_d|RD_s|RD_t,       0, I32R6},
>   {"selnez",  "d,v,t",    0x00000037, 0xfc0007ff, WR_d|RD_s|RD_t,       0, I32R6},
> +{"maddf.s", "D,S,T",    0x46000018, 0xffe0001f, WR_D|RD_S|RD_T|FP_S,  0, I32R6},
> +{"maddf.d", "D,S,T",    0x46200018, 0xffe0001f, WR_D|RD_S|RD_T|FP_D,  0, I32R6},
> +{"msubf.s", "D,S,T",    0x46000019, 0xffe0001f, WR_D|RD_S|RD_T|FP_S,  0, I32R6},
> +{"msubf.d", "D,S,T",    0x46200019, 0xffe0001f, WR_D|RD_S|RD_T|FP_D,  0, I32R6},
> +{"max.s",   "D,S,T",    0x4600001e, 0xffe0001f, WR_D|RD_S|RD_T|FP_S,  0, I32R6},
> +{"max.d",   "D,S,T",    0x4620001e, 0xffe0001f, WR_D|RD_S|RD_T|FP_D,  0, I32R6},
> +{"maxa.s",  "D,S,T",    0x4600001f, 0xffe0001f, WR_D|RD_S|RD_T|FP_S,  0, I32R6},
> +{"maxa.d",  "D,S,T",    0x4620001f, 0xffe0001f, WR_D|RD_S|RD_T|FP_D,  0, I32R6},
> +{"rint.s",  "D,S",      0x4600001a, 0xffe0001f, WR_D|RD_S|FP_S,       0, I32R6},
> +{"rint.d",  "D,S",      0x4620001a, 0xffe0001f, WR_D|RD_S|FP_D,       0, I32R6},
> +{"class.s", "D,S",      0x4600001b, 0xffe0001f, WR_D|RD_S|FP_S,       0, I32R6},
> +{"class.d", "D,S",      0x4620001b, 0xffe0001f, WR_D|RD_S|FP_D,       0, I32R6},
> +{"min.s",   "D,S,T",    0x4600001c, 0xffe0001f, WR_D|RD_S|RD_T|FP_S,  0, I32R6},
> +{"min.d",   "D,S,T",    0x4620001c, 0xffe0001f, WR_D|RD_S|RD_T|FP_D,  0, I32R6},
> +{"mina.s",  "D,S,T",    0x4600001d, 0xffe0001f, WR_D|RD_S|RD_T|FP_S,  0, I32R6},
> +{"mina.d",  "D,S,T",    0x4620001d, 0xffe0001f, WR_D|RD_S|RD_T|FP_D,  0, I32R6},
> +{"sel.s",   "D,S,T",    0x46000010, 0xffe0001f, WR_D|RD_S|RD_T|FP_S,  0, I32R6},
> +{"sel.d",   "D,S,T",    0x46200010, 0xffe0001f, WR_D|RD_S|RD_T|FP_D,  0, I32R6},
> +{"seleqz.s", "D,S,T",   0x46000014, 0xffe0001f, WR_D|RD_S|RD_T|FP_S,  0, I32R6},
> +{"seleqz.d", "D,S,T",   0x46200014, 0xffe0001f, WR_D|RD_S|RD_T|FP_D,  0, I32R6},
> +{"selnez.s", "D,S,T",   0x46000017, 0xffe0001f, WR_D|RD_S|RD_T|FP_S,  0, I32R6},
> +{"selnez.d", "D,S,T",   0x46200017, 0xffe0001f, WR_D|RD_S|RD_T|FP_D,  0, I32R6},
>   {"align",   "d,v,t",    0x7c000220, 0xfc00073f, WR_d|RD_s|RD_t,       0, I32R6},
>   {"dalign",  "d,v,t",    0x7c000224, 0xfc00063f, WR_d|RD_s|RD_t,       0, I64R6},
>   {"bitswap", "d,w",      0x7c000020, 0xffe007ff, WR_d|RD_t,            0, I32R6},
> diff --git a/target-mips/helper.h b/target-mips/helper.h
> index 5511dfc..9020c7b 100644
> --- a/target-mips/helper.h
> +++ b/target-mips/helper.h
> @@ -202,6 +202,25 @@ DEF_HELPER_2(float_cvtw_d, i32, env, i64)
>   DEF_HELPER_3(float_addr_ps, i64, env, i64, i64)
>   DEF_HELPER_3(float_mulr_ps, i64, env, i64, i64)
>   
> +DEF_HELPER_FLAGS_1(float_class_s, TCG_CALL_NO_RWG_SE, i32, i32)
> +DEF_HELPER_FLAGS_1(float_class_d, TCG_CALL_NO_RWG_SE, i64, i64)
> +
> +#define FOP_PROTO(op)                                     \
> +DEF_HELPER_4(float_ ## op ## _s, i32, env, i32, i32, i32) \
> +DEF_HELPER_4(float_ ## op ## _d, i64, env, i64, i64, i64)
> +FOP_PROTO(maddf)
> +FOP_PROTO(msubf)
> +#undef FOP_PROTO
> +
> +#define FOP_PROTO(op)                                \
> +DEF_HELPER_3(float_ ## op ## _s, i32, env, i32, i32) \
> +DEF_HELPER_3(float_ ## op ## _d, i64, env, i64, i64)
> +FOP_PROTO(max)
> +FOP_PROTO(maxa)
> +FOP_PROTO(min)
> +FOP_PROTO(mina)
> +#undef FOP_PROTO
> +
>   #define FOP_PROTO(op)                            \
>   DEF_HELPER_2(float_ ## op ## l_s, i64, env, i32) \
>   DEF_HELPER_2(float_ ## op ## l_d, i64, env, i64) \
> @@ -219,6 +238,7 @@ DEF_HELPER_2(float_ ## op ## _d, i64, env, i64)
>   FOP_PROTO(sqrt)
>   FOP_PROTO(rsqrt)
>   FOP_PROTO(recip)
> +FOP_PROTO(rint)
>   #undef FOP_PROTO
>   
>   #define FOP_PROTO(op)                       \
> diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
> index e8853f7..6a63e5a 100644
> --- a/target-mips/op_helper.c
> +++ b/target-mips/op_helper.c
> @@ -2798,6 +2798,110 @@ FLOAT_UNOP(abs)
>   FLOAT_UNOP(chs)
>   #undef FLOAT_UNOP
>   
> +#define FLOAT_FMADDSUB(name, bits, muladd_arg)                          \
> +uint ## bits ## _t helper_float_ ## name (CPUMIPSState *env,            \
> +                                          uint ## bits ## _t fs,        \
> +                                          uint ## bits ## _t ft,        \
> +                                          uint ## bits ## _t fd)        \
> +{                                                                       \
> +    uint ## bits ## _t fdret;                                           \
> +                                                                        \
> +    fdret = float ## bits ## _muladd(fs, ft, fd, muladd_arg,            \
> +                                     &env->active_fpu.fp_status);       \
> +    update_fcr31(env, GETPC());                                         \
> +    return fdret;                                                       \
> +}
> +
> +FLOAT_FMADDSUB(maddf_s, 32, 0)
> +FLOAT_FMADDSUB(maddf_d, 64, 0)
> +FLOAT_FMADDSUB(msubf_s, 32, float_muladd_negate_product)
> +FLOAT_FMADDSUB(msubf_d, 64, float_muladd_negate_product)
> +#undef FLOAT_FMADDSUB
> +
> +#define FLOAT_MINMAX(name, bits, minmaxfunc)                            \
> +uint ## bits ## _t helper_float_ ## name (CPUMIPSState *env,            \
> +                                          uint ## bits ## _t fs,        \
> +                                          uint ## bits ## _t ft)        \
> +{                                                                       \
> +    uint ## bits ## _t fdret;                                           \
> +                                                                        \
> +    fdret = float ## bits ## _ ## minmaxfunc(fs, ft,                    \
> +                                           &env->active_fpu.fp_status); \
> +    update_fcr31(env, GETPC());                                         \
> +    return fdret;                                                       \
> +}
> +
> +FLOAT_MINMAX(max_s, 32, maxnum)
> +FLOAT_MINMAX(max_d, 64, maxnum)
> +FLOAT_MINMAX(maxa_s, 32, maxnummag)
> +FLOAT_MINMAX(maxa_d, 64, maxnummag)
> +
> +FLOAT_MINMAX(min_s, 32, minnum)
> +FLOAT_MINMAX(min_d, 64, minnum)
> +FLOAT_MINMAX(mina_s, 32, minnummag)
> +FLOAT_MINMAX(mina_d, 64, minnummag)
> +#undef FLOAT_MINMAX
> +
> +#define FLOAT_RINT(name, bits)                                              \
> +uint ## bits ## _t helper_float_ ## name (CPUMIPSState *env,                \
> +                                          uint ## bits ## _t fs)            \
> +{                                                                           \
> +    uint ## bits ## _t fdret;                                               \
> +                                                                            \
> +    fdret = float ## bits ## _round_to_int(fs, &env->active_fpu.fp_status); \
> +    update_fcr31(env, GETPC());                                             \
> +    return fdret;                                                           \
> +}
> +
> +FLOAT_RINT(rint_s, 32)
> +FLOAT_RINT(rint_d, 64)
> +#undef FLOAT_RINT
> +
> +#define FLOAT_CLASS_SIGNALING_NAN      0x001
> +#define FLOAT_CLASS_QUIET_NAN          0x002
> +#define FLOAT_CLASS_NEGATIVE_INFINITY  0x004
> +#define FLOAT_CLASS_NEGATIVE_NORMAL    0x008
> +#define FLOAT_CLASS_NEGATIVE_SUBNORMAL 0x010
> +#define FLOAT_CLASS_NEGATIVE_ZERO      0x020
> +#define FLOAT_CLASS_POSITIVE_INFINITY  0x040
> +#define FLOAT_CLASS_POSITIVE_NORMAL    0x080
> +#define FLOAT_CLASS_POSITIVE_SUBNORMAL 0x100
> +#define FLOAT_CLASS_POSITIVE_ZERO      0x200
> +
> +#define FLOAT_CLASS(name, bits)                                      \
> +uint ## bits ## _t helper_float_ ## name (uint ## bits ## _t arg)    \
> +{                                                                    \
> +    if (float ## bits ## _is_signaling_nan(arg)) {                   \
> +        return FLOAT_CLASS_SIGNALING_NAN;                            \
> +    } else if (float ## bits ## _is_quiet_nan(arg)) {                \
> +        return FLOAT_CLASS_QUIET_NAN;                                \
> +    } else if (float ## bits ## _is_neg(arg)) {                      \
> +        if (float ## bits ## _is_infinity(arg)) {                    \
> +            return FLOAT_CLASS_NEGATIVE_INFINITY;                    \
> +        } else if (float ## bits ## _is_zero(arg)) {                 \
> +            return FLOAT_CLASS_NEGATIVE_ZERO;                        \
> +        } else if (float ## bits ## _is_zero_or_denormal(arg)) {     \
> +            return FLOAT_CLASS_NEGATIVE_SUBNORMAL;                   \
> +        } else {                                                     \
> +            return FLOAT_CLASS_NEGATIVE_NORMAL;                      \
> +        }                                                            \
> +    } else {                                                         \
> +        if (float ## bits ## _is_infinity(arg)) {                    \
> +            return FLOAT_CLASS_POSITIVE_INFINITY;                    \
> +        } else if (float ## bits ## _is_zero(arg)) {                 \
> +            return FLOAT_CLASS_POSITIVE_ZERO;                        \
> +        } else if (float ## bits ## _is_zero_or_denormal(arg)) {     \
> +            return FLOAT_CLASS_POSITIVE_SUBNORMAL;                   \
> +        } else {                                                     \
> +            return FLOAT_CLASS_POSITIVE_NORMAL;                      \
> +        }                                                            \
> +    }                                                                \
> +}
> +
> +FLOAT_CLASS(class_s, 32)
> +FLOAT_CLASS(class_d, 64)
> +#undef FLOAT_CLASS
> +
>   /* MIPS specific unary operations */
>   uint64_t helper_float_recip_d(CPUMIPSState *env, uint64_t fdt0)
>   {
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index 188caf7..856a4b2 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -7646,14 +7646,25 @@ enum fopcode {
>       OPC_TRUNC_W_S = FOP(13, FMT_S),
>       OPC_CEIL_W_S = FOP(14, FMT_S),
>       OPC_FLOOR_W_S = FOP(15, FMT_S),
> +    OPC_SEL_S = FOP(16, FMT_S),
>       OPC_MOVCF_S = FOP(17, FMT_S),
>       OPC_MOVZ_S = FOP(18, FMT_S),
>       OPC_MOVN_S = FOP(19, FMT_S),
> +    OPC_SELEQZ_S = FOP(20, FMT_S),
>       OPC_RECIP_S = FOP(21, FMT_S),
>       OPC_RSQRT_S = FOP(22, FMT_S),
> +    OPC_SELNEZ_S = FOP(23, FMT_S),
> +    OPC_MADDF_S = FOP(24, FMT_S),
> +    OPC_MSUBF_S = FOP(25, FMT_S),
> +    OPC_RINT_S = FOP(26, FMT_S),
> +    OPC_CLASS_S = FOP(27, FMT_S),
> +    OPC_MIN_S = FOP(28, FMT_S),
>       OPC_RECIP2_S = FOP(28, FMT_S),
> +    OPC_MINA_S = FOP(29, FMT_S),
>       OPC_RECIP1_S = FOP(29, FMT_S),
> +    OPC_MAX_S = FOP(30, FMT_S),
>       OPC_RSQRT1_S = FOP(30, FMT_S),
> +    OPC_MAXA_S = FOP(31, FMT_S),
>       OPC_RSQRT2_S = FOP(31, FMT_S),
>       OPC_CVT_D_S = FOP(33, FMT_S),
>       OPC_CVT_W_S = FOP(36, FMT_S),
> @@ -7692,14 +7703,25 @@ enum fopcode {
>       OPC_TRUNC_W_D = FOP(13, FMT_D),
>       OPC_CEIL_W_D = FOP(14, FMT_D),
>       OPC_FLOOR_W_D = FOP(15, FMT_D),
> +    OPC_SEL_D = FOP(16, FMT_D),
>       OPC_MOVCF_D = FOP(17, FMT_D),
>       OPC_MOVZ_D = FOP(18, FMT_D),
>       OPC_MOVN_D = FOP(19, FMT_D),
> +    OPC_SELEQZ_D = FOP(20, FMT_D),
>       OPC_RECIP_D = FOP(21, FMT_D),
>       OPC_RSQRT_D = FOP(22, FMT_D),
> +    OPC_SELNEZ_D = FOP(23, FMT_D),
> +    OPC_MADDF_D = FOP(24, FMT_D),
> +    OPC_MSUBF_D = FOP(25, FMT_D),
> +    OPC_RINT_D = FOP(26, FMT_D),
> +    OPC_CLASS_D = FOP(27, FMT_D),
> +    OPC_MIN_D = FOP(28, FMT_D),
>       OPC_RECIP2_D = FOP(28, FMT_D),
> +    OPC_MINA_D = FOP(29, FMT_D),
>       OPC_RECIP1_D = FOP(29, FMT_D),
> +    OPC_MAX_D = FOP(30, FMT_D),
>       OPC_RSQRT1_D = FOP(30, FMT_D),
> +    OPC_MAXA_D = FOP(31, FMT_D),
>       OPC_RSQRT2_D = FOP(31, FMT_D),
>       OPC_CVT_S_D = FOP(32, FMT_D),
>       OPC_CVT_W_D = FOP(36, FMT_D),
> @@ -7955,6 +7977,79 @@ static inline void gen_movcf_ps(DisasContext *ctx, int fs, int fd,
>       gen_set_label(l2);
>   }
>   
> +static void gen_sel_s (DisasContext *ctx, enum fopcode op1, int fd, int ft,
No space between function name and open parenthesis

> +                       int fs)
> +{
> +    TCGv_i32 t1 = tcg_const_i32(0);
> +    TCGv_i32 fp0 = tcg_temp_new_i32();
> +    TCGv_i32 fp1 = tcg_temp_new_i32();
> +    TCGv_i32 fp2 = tcg_temp_new_i32();
> +    gen_load_fpr32(fp0, fd);
> +    gen_load_fpr32(fp1, ft);
> +    gen_load_fpr32(fp2, fs);
> +
> +    switch (op1) {
> +    case OPC_SEL_S:
> +        tcg_gen_andi_i32(fp0, fp0, 1);
> +        tcg_gen_movcond_i32(TCG_COND_NE, fp0, fp0, t1, fp1, fp2);
> +        break;
> +    case OPC_SELEQZ_S:
> +        tcg_gen_andi_i32(fp1, fp1, 1);
> +        tcg_gen_movcond_i32(TCG_COND_EQ, fp0, fp1, t1, fp2, t1);
> +        break;
> +    case OPC_SELNEZ_S:
> +        tcg_gen_andi_i32(fp1, fp1, 1);
> +        tcg_gen_movcond_i32(TCG_COND_NE, fp0, fp1, t1, fp2, t1);
> +        break;
> +    default:
> +        MIPS_INVAL("gen_sel_s");
> +        generate_exception (ctx, EXCP_RI);
It shouldn't return here, need to free t1, fp0, fp1 and fp2.
> +        return;
> +    }
> +
> +    gen_store_fpr32(fp0, fd);
> +    tcg_temp_free_i32(fp2);
> +    tcg_temp_free_i32(fp1);
> +    tcg_temp_free_i32(fp0);
> +    tcg_temp_free_i32(t1);
> +}
> +
> +static void gen_sel_d (DisasContext *ctx, enum fopcode op1, int fd, int ft,
> +                       int fs)
> +{
> +    TCGv_i64 t1 = tcg_const_i64(0);
> +    TCGv_i64 fp0 = tcg_temp_new_i64();
> +    TCGv_i64 fp1 = tcg_temp_new_i64();
> +    TCGv_i64 fp2 = tcg_temp_new_i64();
> +    gen_load_fpr64(ctx, fp0, fd);
> +    gen_load_fpr64(ctx, fp1, ft);
> +    gen_load_fpr64(ctx, fp2, fs);
> +
> +    switch (op1) {
> +    case OPC_SEL_D:
> +        tcg_gen_andi_i64(fp0, fp0, 1);
> +        tcg_gen_movcond_i64(TCG_COND_NE, fp0, fp0, t1, fp1, fp2);
> +        break;
> +    case OPC_SELEQZ_D:
> +        tcg_gen_andi_i64(fp1, fp1, 1);
> +        tcg_gen_movcond_i64(TCG_COND_EQ, fp0, fp1, t1, fp2, t1);
> +        break;
> +    case OPC_SELNEZ_D:
> +        tcg_gen_andi_i64(fp1, fp1, 1);
> +        tcg_gen_movcond_i64(TCG_COND_NE, fp0, fp1, t1, fp2, t1);
> +        break;
> +    default:
> +        MIPS_INVAL("gen_sel_d");
> +        generate_exception (ctx, EXCP_RI);
> +        return;
It shouldn't return here as well, need to free t1, fp0, fp1 and fp2.
> +    }
> +
> +    gen_store_fpr64(ctx, fp0, fd);
> +    tcg_temp_free_i64(fp2);
> +    tcg_temp_free_i64(fp1);
> +    tcg_temp_free_i64(fp0);
> +    tcg_temp_free_i64(t1);
> +}
>   
>   static void gen_farith (DisasContext *ctx, enum fopcode op1,
>                           int ft, int fs, int fd, int cc)
> @@ -8203,6 +8298,21 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
>           }
>           opn = "floor.w.s";
>           break;
> +    case OPC_SEL_S:
> +        check_insn(ctx, ISA_MIPS32R6);
> +        gen_sel_s(ctx, op1, fd, ft, fs);
> +        opn = "sel.s";
> +        break;
> +    case OPC_SELEQZ_S:
> +        check_insn(ctx, ISA_MIPS32R6);
> +        gen_sel_s(ctx, op1, fd, ft, fs);
> +        opn = "seleqz.s";
> +        break;
> +    case OPC_SELNEZ_S:
> +        check_insn(ctx, ISA_MIPS32R6);
> +        gen_sel_s(ctx, op1, fd, ft, fs);
> +        opn = "selnez.s";
> +        break;
>       case OPC_MOVCF_S:
>           check_insn_opc_removed(ctx, ISA_MIPS32R6);
>           gen_movcf_s(fs, fd, (ft >> 2) & 0x7, ft & 0x1);
> @@ -8266,59 +8376,175 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
>           }
>           opn = "rsqrt.s";
>           break;
> -    case OPC_RECIP2_S:
> -        check_cp1_64bitmode(ctx);
> +    case OPC_MADDF_S:
> +        check_insn(ctx, ISA_MIPS32R6);
>           {
>               TCGv_i32 fp0 = tcg_temp_new_i32();
>               TCGv_i32 fp1 = tcg_temp_new_i32();
> -
> +            TCGv_i32 fp2 = tcg_temp_new_i32();
>               gen_load_fpr32(fp0, fs);
>               gen_load_fpr32(fp1, ft);
> -            gen_helper_float_recip2_s(fp0, cpu_env, fp0, fp1);
> +            gen_load_fpr32(fp2, fd);
> +            gen_helper_float_maddf_s(fp2, cpu_env, fp0, fp1, fp2);
> +            gen_store_fpr32(fp2, fd);
> +            tcg_temp_free_i32(fp2);
>               tcg_temp_free_i32(fp1);
> -            gen_store_fpr32(fp0, fd);
>               tcg_temp_free_i32(fp0);
> +            opn = "maddf.s";
>           }
> -        opn = "recip2.s";
> -        break;
> -    case OPC_RECIP1_S:
> -        check_cp1_64bitmode(ctx);
> +    break;
Indentation :)
> +    case OPC_MSUBF_S:
> +        check_insn(ctx, ISA_MIPS32R6);
> +        {
> +            TCGv_i32 fp0 = tcg_temp_new_i32();
> +            TCGv_i32 fp1 = tcg_temp_new_i32();
> +            TCGv_i32 fp2 = tcg_temp_new_i32();
> +            gen_load_fpr32(fp0, fs);
> +            gen_load_fpr32(fp1, ft);
> +            gen_load_fpr32(fp2, fd);
> +            gen_helper_float_msubf_s(fp2, cpu_env, fp0, fp1, fp2);
> +            gen_store_fpr32(fp2, fd);
> +            tcg_temp_free_i32(fp2);
> +            tcg_temp_free_i32(fp1);
> +            tcg_temp_free_i32(fp0);
> +            opn = "msubf.s";
> +        }
> +    break;
> +    case OPC_RINT_S:
> +        check_insn(ctx, ISA_MIPS32R6);
>           {
>               TCGv_i32 fp0 = tcg_temp_new_i32();
> -
>               gen_load_fpr32(fp0, fs);
> -            gen_helper_float_recip1_s(fp0, cpu_env, fp0);
> +            gen_helper_float_rint_s(fp0, cpu_env, fp0);
>               gen_store_fpr32(fp0, fd);
>               tcg_temp_free_i32(fp0);
> +            opn = "rint.s";
>           }
> -        opn = "recip1.s";
> -        break;
> -    case OPC_RSQRT1_S:
> -        check_cp1_64bitmode(ctx);
> +    break;
> +    case OPC_CLASS_S:
> +        check_insn(ctx, ISA_MIPS32R6);
>           {
>               TCGv_i32 fp0 = tcg_temp_new_i32();
> -
>               gen_load_fpr32(fp0, fs);
> -            gen_helper_float_rsqrt1_s(fp0, cpu_env, fp0);
> +            gen_helper_float_class_s(fp0, fp0);
>               gen_store_fpr32(fp0, fd);
>               tcg_temp_free_i32(fp0);
> +            opn = "class.s";
> +        }
> +    break;
> +    case OPC_MIN_S: /* OPC_RECIP2_S */
> +        if (ctx->insn_flags & ISA_MIPS32R6) {
> +            /* OPC_MIN_S */
> +            TCGv_i32 fp0 = tcg_temp_new_i32();
> +            TCGv_i32 fp1 = tcg_temp_new_i32();
> +            TCGv_i32 fp2 = tcg_temp_new_i32();
> +            gen_load_fpr32(fp0, fs);
> +            gen_load_fpr32(fp1, ft);
> +            gen_helper_float_min_s(fp2, cpu_env, fp0, fp1);
> +            gen_store_fpr32(fp2, fd);
> +            tcg_temp_free_i32(fp2);
> +            tcg_temp_free_i32(fp1);
> +            tcg_temp_free_i32(fp0);
> +            opn = "min.s";
> +        } else {
> +            /* OPC_RECIP2_S */
> +            check_cp1_64bitmode(ctx);
> +            {
> +                TCGv_i32 fp0 = tcg_temp_new_i32();
> +                TCGv_i32 fp1 = tcg_temp_new_i32();
> +
> +                gen_load_fpr32(fp0, fs);
> +                gen_load_fpr32(fp1, ft);
> +                gen_helper_float_recip2_s(fp0, cpu_env, fp0, fp1);
> +                tcg_temp_free_i32(fp1);
> +                gen_store_fpr32(fp0, fd);
> +                tcg_temp_free_i32(fp0);
> +            }
> +            opn = "recip2.s";
>           }
> -        opn = "rsqrt1.s";
>           break;
> -    case OPC_RSQRT2_S:
> -        check_cp1_64bitmode(ctx);
> -        {
> +    case OPC_MINA_S: /* OPC_RECIP1_S */
> +        if (ctx->insn_flags & ISA_MIPS32R6) {
> +            /* OPC_MINA_S */
>               TCGv_i32 fp0 = tcg_temp_new_i32();
>               TCGv_i32 fp1 = tcg_temp_new_i32();
> +            TCGv_i32 fp2 = tcg_temp_new_i32();
> +            gen_load_fpr32(fp0, fs);
> +            gen_load_fpr32(fp1, ft);
> +            gen_helper_float_mina_s(fp2, cpu_env, fp0, fp1);
> +            gen_store_fpr32(fp2, fd);
> +            tcg_temp_free_i32(fp2);
> +            tcg_temp_free_i32(fp1);
> +            tcg_temp_free_i32(fp0);
> +            opn = "mina.s";
> +        } else {
> +            /* OPC_RECIP1_S */
> +            check_cp1_64bitmode(ctx);
> +            {
> +                TCGv_i32 fp0 = tcg_temp_new_i32();
>   
> +                gen_load_fpr32(fp0, fs);
> +                gen_helper_float_recip1_s(fp0, cpu_env, fp0);
> +                gen_store_fpr32(fp0, fd);
> +                tcg_temp_free_i32(fp0);
> +            }
> +            opn = "recip1.s";
> +        }
> +        break;
> +    case OPC_MAX_S: /* OPC_RSQRT1_S */
> +        if (ctx->insn_flags & ISA_MIPS32R6) {
> +            /* OPC_MAX_S */
> +            TCGv_i32 fp0 = tcg_temp_new_i32();
> +            TCGv_i32 fp1 = tcg_temp_new_i32();
>               gen_load_fpr32(fp0, fs);
>               gen_load_fpr32(fp1, ft);
> -            gen_helper_float_rsqrt2_s(fp0, cpu_env, fp0, fp1);
> +            gen_helper_float_max_s(fp1, cpu_env, fp0, fp1);
> +            gen_store_fpr32(fp1, fd);
> +            tcg_temp_free_i32(fp1);
> +            tcg_temp_free_i32(fp0);
> +            opn = "max.s";
> +        } else {
> +            /* OPC_RSQRT1_S */
> +            check_cp1_64bitmode(ctx);
> +            {
> +                TCGv_i32 fp0 = tcg_temp_new_i32();
> +
> +                gen_load_fpr32(fp0, fs);
> +                gen_helper_float_rsqrt1_s(fp0, cpu_env, fp0);
> +                gen_store_fpr32(fp0, fd);
> +                tcg_temp_free_i32(fp0);
> +            }
> +            opn = "rsqrt1.s";
> +        }
> +        break;
> +    case OPC_MAXA_S: /* OPC_RSQRT2_S */
> +        if (ctx->insn_flags & ISA_MIPS32R6) {
> +            /* OPC_MAXA_S */
> +            TCGv_i32 fp0 = tcg_temp_new_i32();
> +            TCGv_i32 fp1 = tcg_temp_new_i32();
> +            gen_load_fpr32(fp0, fs);
> +            gen_load_fpr32(fp1, ft);
> +            gen_helper_float_maxa_s(fp1, cpu_env, fp0, fp1);
> +            gen_store_fpr32(fp1, fd);
>               tcg_temp_free_i32(fp1);
> -            gen_store_fpr32(fp0, fd);
>               tcg_temp_free_i32(fp0);
> +            opn = "maxa.s";
> +        } else {
> +            /* OPC_RSQRT2_S */
> +            check_cp1_64bitmode(ctx);
> +            {
> +                TCGv_i32 fp0 = tcg_temp_new_i32();
> +                TCGv_i32 fp1 = tcg_temp_new_i32();
> +
> +                gen_load_fpr32(fp0, fs);
> +                gen_load_fpr32(fp1, ft);
> +                gen_helper_float_rsqrt2_s(fp0, cpu_env, fp0, fp1);
> +                tcg_temp_free_i32(fp1);
> +                gen_store_fpr32(fp0, fd);
> +                tcg_temp_free_i32(fp0);
> +            }
> +            opn = "rsqrt2.s";
>           }
> -        opn = "rsqrt2.s";
>           break;
>       case OPC_CVT_D_S:
>           check_cp1_registers(ctx, fd);
> @@ -8617,6 +8843,21 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
>           }
>           opn = "floor.w.d";
>           break;
> +    case OPC_SEL_D:
> +        check_insn(ctx, ISA_MIPS32R6);
> +        gen_sel_d(ctx, op1, fd, ft, fs);
> +        opn = "sel.d";
> +        break;
> +    case OPC_SELEQZ_D:
> +        check_insn(ctx, ISA_MIPS32R6);
> +        gen_sel_d(ctx, op1, fd, ft, fs);
> +        opn = "seleqz.d";
> +        break;
> +    case OPC_SELNEZ_D:
> +        check_insn(ctx, ISA_MIPS32R6);
> +        gen_sel_d(ctx, op1, fd, ft, fs);
> +        opn = "selnez.d";
> +        break;
>       case OPC_MOVCF_D:
>           check_insn_opc_removed(ctx, ISA_MIPS32R6);
>           gen_movcf_d(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
> @@ -8680,59 +8921,171 @@ static void gen_farith (DisasContext *ctx, enum fopcode op1,
>           }
>           opn = "rsqrt.d";
>           break;
> -    case OPC_RECIP2_D:
> -        check_cp1_64bitmode(ctx);
> +    case OPC_MADDF_D:
> +        check_insn(ctx, ISA_MIPS32R6);
>           {
>               TCGv_i64 fp0 = tcg_temp_new_i64();
>               TCGv_i64 fp1 = tcg_temp_new_i64();
> -
> +            TCGv_i64 fp2 = tcg_temp_new_i64();
>               gen_load_fpr64(ctx, fp0, fs);
>               gen_load_fpr64(ctx, fp1, ft);
> -            gen_helper_float_recip2_d(fp0, cpu_env, fp0, fp1);
> +            gen_load_fpr64(ctx, fp2, fd);
> +            gen_helper_float_maddf_d(fp2, cpu_env, fp0, fp1, fp2);
> +            gen_store_fpr64(ctx, fp2, fd);
> +            tcg_temp_free_i64(fp2);
>               tcg_temp_free_i64(fp1);
> -            gen_store_fpr64(ctx, fp0, fd);
>               tcg_temp_free_i64(fp0);
> +            opn = "maddf.d";
>           }
> -        opn = "recip2.d";
> -        break;
> -    case OPC_RECIP1_D:
> -        check_cp1_64bitmode(ctx);
> +    break;
> +    case OPC_MSUBF_D:
> +        check_insn(ctx, ISA_MIPS32R6);
>           {
>               TCGv_i64 fp0 = tcg_temp_new_i64();
> -
> +            TCGv_i64 fp1 = tcg_temp_new_i64();
> +            TCGv_i64 fp2 = tcg_temp_new_i64();
>               gen_load_fpr64(ctx, fp0, fs);
> -            gen_helper_float_recip1_d(fp0, cpu_env, fp0);
> +            gen_load_fpr64(ctx, fp1, ft);
> +            gen_load_fpr64(ctx, fp2, fd);
> +            gen_helper_float_msubf_d(fp2, cpu_env, fp0, fp1, fp2);
> +            gen_store_fpr64(ctx, fp2, fd);
> +            tcg_temp_free_i64(fp2);
> +            tcg_temp_free_i64(fp1);
> +            tcg_temp_free_i64(fp0);
> +            opn = "msubf.d";
> +        }
> +    break;
> +    case OPC_RINT_D:
> +        check_insn(ctx, ISA_MIPS32R6);
> +        {
> +            TCGv_i64 fp0 = tcg_temp_new_i64();
> +            gen_load_fpr64(ctx, fp0, fs);
> +            gen_helper_float_rint_d(fp0, cpu_env, fp0);
>               gen_store_fpr64(ctx, fp0, fd);
>               tcg_temp_free_i64(fp0);
> +            opn = "rint.d";
>           }
> -        opn = "recip1.d";
> -        break;
> -    case OPC_RSQRT1_D:
> -        check_cp1_64bitmode(ctx);
> +    break;
> +    case OPC_CLASS_D:
> +        check_insn(ctx, ISA_MIPS32R6);
>           {
>               TCGv_i64 fp0 = tcg_temp_new_i64();
> -
>               gen_load_fpr64(ctx, fp0, fs);
> -            gen_helper_float_rsqrt1_d(fp0, cpu_env, fp0);
> +            gen_helper_float_class_d(fp0, fp0);
>               gen_store_fpr64(ctx, fp0, fd);
>               tcg_temp_free_i64(fp0);
> +            opn = "class.d";
> +        }
> +    break;
> +    case OPC_MIN_D: /* OPC_RECIP2_D */
> +        if (ctx->insn_flags & ISA_MIPS32R6) {
> +            /* OPC_MIN_D */
> +            TCGv_i64 fp0 = tcg_temp_new_i64();
> +            TCGv_i64 fp1 = tcg_temp_new_i64();
> +            gen_load_fpr64(ctx, fp0, fs);
> +            gen_load_fpr64(ctx, fp1, ft);
> +            gen_helper_float_min_d(fp1, cpu_env, fp0, fp1);
> +            gen_store_fpr64(ctx, fp1, fd);
> +            tcg_temp_free_i64(fp1);
> +            tcg_temp_free_i64(fp0);
> +            opn = "min.d";
> +        } else {
> +            /* OPC_RECIP2_D */
> +            check_cp1_64bitmode(ctx);
> +            {
> +                TCGv_i64 fp0 = tcg_temp_new_i64();
> +                TCGv_i64 fp1 = tcg_temp_new_i64();
> +
> +                gen_load_fpr64(ctx, fp0, fs);
> +                gen_load_fpr64(ctx, fp1, ft);
> +                gen_helper_float_recip2_d(fp0, cpu_env, fp0, fp1);
> +                tcg_temp_free_i64(fp1);
> +                gen_store_fpr64(ctx, fp0, fd);
> +                tcg_temp_free_i64(fp0);
> +            }
> +            opn = "recip2.d";
>           }
> -        opn = "rsqrt1.d";
>           break;
> -    case OPC_RSQRT2_D:
> -        check_cp1_64bitmode(ctx);
> -        {
> +    case OPC_MINA_D: /* OPC_RECIP1_D */
> +        if (ctx->insn_flags & ISA_MIPS32R6) {
> +            /* OPC_MINA_D */
> +            TCGv_i64 fp0 = tcg_temp_new_i64();
> +            TCGv_i64 fp1 = tcg_temp_new_i64();
> +            gen_load_fpr64(ctx, fp0, fs);
> +            gen_load_fpr64(ctx, fp1, ft);
> +            gen_helper_float_mina_d(fp1, cpu_env, fp0, fp1);
> +            gen_store_fpr64(ctx, fp1, fd);
> +            tcg_temp_free_i64(fp1);
> +            tcg_temp_free_i64(fp0);
> +            opn = "mina.d";
> +        } else {
> +            /* OPC_RECIP1_D */
> +            check_cp1_64bitmode(ctx);
> +            {
> +                TCGv_i64 fp0 = tcg_temp_new_i64();
> +
> +                gen_load_fpr64(ctx, fp0, fs);
> +                gen_helper_float_recip1_d(fp0, cpu_env, fp0);
> +                gen_store_fpr64(ctx, fp0, fd);
> +                tcg_temp_free_i64(fp0);
> +            }
> +            opn = "recip1.d";
> +        }
> +        break;
> +    case OPC_MAX_D: /*  OPC_RSQRT1_D */
> +        if (ctx->insn_flags & ISA_MIPS32R6) {
> +            /* OPC_MAX_D */
>               TCGv_i64 fp0 = tcg_temp_new_i64();
>               TCGv_i64 fp1 = tcg_temp_new_i64();
> +            gen_load_fpr64(ctx, fp0, fs);
> +            gen_load_fpr64(ctx, fp1, ft);
> +            gen_helper_float_max_d(fp1, cpu_env, fp0, fp1);
> +            gen_store_fpr64(ctx, fp1, fd);
> +            tcg_temp_free_i64(fp1);
> +            tcg_temp_free_i64(fp0);
> +            opn = "max.s";
max.d :)
> +        } else {
> +            /* OPC_RSQRT1_D */
> +            check_cp1_64bitmode(ctx);
> +            {
> +                TCGv_i64 fp0 = tcg_temp_new_i64();
>   
> +                gen_load_fpr64(ctx, fp0, fs);
> +                gen_helper_float_rsqrt1_d(fp0, cpu_env, fp0);
> +                gen_store_fpr64(ctx, fp0, fd);
> +                tcg_temp_free_i64(fp0);
> +            }
> +            opn = "rsqrt1.d";
> +        }
> +        break;
> +    case OPC_MAXA_D: /* OPC_RSQRT2_D */
> +        if (ctx->insn_flags & ISA_MIPS32R6) {
> +            /* OPC_MAXA_D */
> +            TCGv_i64 fp0 = tcg_temp_new_i64();
> +            TCGv_i64 fp1 = tcg_temp_new_i64();
>               gen_load_fpr64(ctx, fp0, fs);
>               gen_load_fpr64(ctx, fp1, ft);
> -            gen_helper_float_rsqrt2_d(fp0, cpu_env, fp0, fp1);
> +            gen_helper_float_maxa_d(fp1, cpu_env, fp0, fp1);
> +            gen_store_fpr64(ctx, fp1, fd);
>               tcg_temp_free_i64(fp1);
> -            gen_store_fpr64(ctx, fp0, fd);
>               tcg_temp_free_i64(fp0);
> +            opn = "maxa.d";
> +        } else {
> +            /* OPC_RSQRT2_D */
> +            check_cp1_64bitmode(ctx);
> +            {
> +                TCGv_i64 fp0 = tcg_temp_new_i64();
> +                TCGv_i64 fp1 = tcg_temp_new_i64();
> +
> +                gen_load_fpr64(ctx, fp0, fs);
> +                gen_load_fpr64(ctx, fp1, ft);
> +                gen_helper_float_rsqrt2_d(fp0, cpu_env, fp0, fp1);
> +                tcg_temp_free_i64(fp1);
> +                gen_store_fpr64(ctx, fp0, fd);
> +                tcg_temp_free_i64(fp0);
> +            }
> +            opn = "rsqrt2.d";
>           }
> -        opn = "rsqrt2.d";
>           break;
>       case OPC_CMP_F_D:
>       case OPC_CMP_UN_D: