Re: [PATCH 07/20] target/loongarch: Add fixed point arithmetic instruction translation - Philippe Mathieu-Daudé

From: "Philippe Mathieu-Daudé" <f4bug@amsat.org>
To: Song Gao <gaosong@loongson.cn>, qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, thuth@redhat.com,
	richard.henderson@linaro.org, laurent@vivier.eu,
	maobibo@loongson.cn, alistair.francis@wdc.com,
	pbonzini@redhat.com
Subject: Re: [PATCH 07/20] target/loongarch: Add fixed point arithmetic instruction translation
Date: Thu, 1 Jul 2021 22:31:01 +0200	[thread overview]
Message-ID: <248d54fc-ebdd-09e9-22c2-e66307b22705@amsat.org> (raw)
In-Reply-To: <1624881885-31692-8-git-send-email-gaosong@loongson.cn>

On 6/28/21 2:04 PM, Song Gao wrote:
> This patch implement fixed point arithemtic instruction translation.
> 
> This includes:
> - ADD.{W/D}, SUB.{W/D}
> - ADDI.{W/D}, ADDU16ID
> - ALSL.{W[U]/D}
> - LU12I.W, LU32I.D LU52I.D
> - SLT[U], SLT[U]I
> - PCADDI, PCADDU12I, PCADDU18I, PCALAU12I
> - AND, OR, NOR, XOR, ANDN, ORN
> - MUL.{W/D}, MULH.{W[U]/D[U]}
> - MULW.D.W[U]
> - DIV.{W[U]/D[U]}, MOD.{W[U]/D[U]}
> - ANDI, ORI, XORI
> 
> Signed-off-by: Song Gao <gaosong@loongson.cn>
> ---
>  target/loongarch/insns.decode |  89 ++++++++
>  target/loongarch/instmap.h    |  53 +++++
>  target/loongarch/trans.inc.c  | 367 +++++++++++++++++++++++++++++++++
>  target/loongarch/translate.c  | 458 ++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 967 insertions(+)
>  create mode 100644 target/loongarch/insns.decode
>  create mode 100644 target/loongarch/instmap.h
>  create mode 100644 target/loongarch/trans.inc.c

> diff --git a/target/loongarch/instmap.h b/target/loongarch/instmap.h
> new file mode 100644
> index 0000000..8844333
> --- /dev/null
> +++ b/target/loongarch/instmap.h
> @@ -0,0 +1,53 @@
> +/*
> + * LoongArch emulation for qemu: instruction opcode
> + *
> + * Copyright (c) 2021 Loongson Technology Corporation Limited
> + *
> + * SPDX-License-Identifier: LGPL-2.1+
> + */
> +
> +#ifndef TARGET_LOONGARCH_INSTMAP_H
> +#define TARGET_LOONGARCH_INSTMAP_H
> +
> +/* fixed point opcodes */
> +enum {
> +    LA_OPC_ADD_W     = (0x00020 << 15),
> +    LA_OPC_ADD_D     = (0x00021 << 15),
> +    LA_OPC_SUB_W     = (0x00022 << 15),
> +    LA_OPC_SUB_D     = (0x00023 << 15),
> +    LA_OPC_SLT       = (0x00024 << 15),
> +    LA_OPC_SLTU      = (0x00025 << 15),
> +    LA_OPC_NOR       = (0x00028 << 15),
> +    LA_OPC_AND       = (0x00029 << 15),
> +    LA_OPC_OR        = (0x0002A << 15),
> +    LA_OPC_XOR       = (0x0002B << 15),
> +    LA_OPC_MUL_W     = (0x00038 << 15),
> +    LA_OPC_MULH_W    = (0x00039 << 15),
> +    LA_OPC_MULH_WU   = (0x0003A << 15),
> +    LA_OPC_MUL_D     = (0x0003B << 15),
> +    LA_OPC_MULH_D    = (0x0003C << 15),
> +    LA_OPC_MULH_DU   = (0x0003D << 15),
> +    LA_OPC_DIV_W     = (0x00040 << 15),
> +    LA_OPC_MOD_W     = (0x00041 << 15),
> +    LA_OPC_DIV_WU    = (0x00042 << 15),
> +    LA_OPC_MOD_WU    = (0x00043 << 15),
> +    LA_OPC_DIV_D     = (0x00044 << 15),
> +    LA_OPC_MOD_D     = (0x00045 << 15),
> +    LA_OPC_DIV_DU    = (0x00046 << 15),
> +    LA_OPC_MOD_DU    = (0x00047 << 15),
> +
> +    LA_OPC_ALSL_W    = (0x0002 << 17),
> +    LA_OPC_ALSL_D    = (0x0016 << 17)
> +
> +};
> +
> +/* 12 bit immediate opcodes */
> +enum {
> +    LA_OPC_SLTI      = (0x008 << 22),
> +    LA_OPC_SLTIU     = (0x009 << 22),
> +    LA_OPC_ADDI_W    = (0x00A << 22),
> +    LA_OPC_ADDI_D    = (0x00B << 22),
> +    LA_OPC_ANDI      = (0x00D << 22),
> +    LA_OPC_ORI       = (0x00E << 22),
> +    LA_OPC_XORI      = (0x00F << 22)
> +};

Opcode definitions shouldn't be needed when using decodetree.

> +/* loongarch mul and div */
> +static void gen_loongarch_muldiv(DisasContext *ctx, int opc, int rd,
> +                                 int rj, int rk)
> +{
> +    TCGv t0, t1;
> +
> +    if (rd == 0) {
> +        /* Treat as NOP. */
> +        return;
> +    }
> +
> +    t0 = tcg_temp_new();
> +    t1 = tcg_temp_new();
> +
> +    gen_load_gpr(t0, rj);
> +    gen_load_gpr(t1, rk);
> +
> +    switch (opc) {
> +    case LA_OPC_DIV_W:
> +        {
> +            TCGv t2 = tcg_temp_new();
> +            TCGv t3 = tcg_temp_new();
> +            tcg_gen_ext32s_tl(t0, t0);
> +            tcg_gen_ext32s_tl(t1, t1);
> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
> +            tcg_gen_and_tl(t2, t2, t3);
> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
> +            tcg_gen_or_tl(t2, t2, t3);
> +            tcg_gen_movi_tl(t3, 0);
> +            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
> +            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
> +            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
> +            tcg_temp_free(t3);
> +            tcg_temp_free(t2);
> +        }
> +        break;
> +    case LA_OPC_MOD_W:
> +        {
> +            TCGv t2 = tcg_temp_new();
> +            TCGv t3 = tcg_temp_new();
> +            tcg_gen_ext32s_tl(t0, t0);
> +            tcg_gen_ext32s_tl(t1, t1);
> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
> +            tcg_gen_and_tl(t2, t2, t3);
> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
> +            tcg_gen_or_tl(t2, t2, t3);
> +            tcg_gen_movi_tl(t3, 0);
> +            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
> +            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
> +            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
> +            tcg_temp_free(t3);
> +            tcg_temp_free(t2);
> +        }
> +        break;
> +    case LA_OPC_DIV_WU:
> +        {
> +            TCGv t2 = tcg_const_tl(0);
> +            TCGv t3 = tcg_const_tl(1);
> +            tcg_gen_ext32u_tl(t0, t0);
> +            tcg_gen_ext32u_tl(t1, t1);
> +            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
> +            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
> +            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
> +            tcg_temp_free(t3);
> +            tcg_temp_free(t2);
> +        }
> +        break;
> +    case LA_OPC_MOD_WU:
> +        {
> +            TCGv t2 = tcg_const_tl(0);
> +            TCGv t3 = tcg_const_tl(1);
> +            tcg_gen_ext32u_tl(t0, t0);
> +            tcg_gen_ext32u_tl(t1, t1);
> +            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
> +            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
> +            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
> +            tcg_temp_free(t3);
> +            tcg_temp_free(t2);
> +        }
> +        break;
> +    case LA_OPC_MUL_W:
> +        {
> +            TCGv_i32 t2 = tcg_temp_new_i32();
> +            TCGv_i32 t3 = tcg_temp_new_i32();
> +            tcg_gen_trunc_tl_i32(t2, t0);
> +            tcg_gen_trunc_tl_i32(t3, t1);
> +            tcg_gen_mul_i32(t2, t2, t3);
> +            tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
> +            tcg_temp_free_i32(t2);
> +            tcg_temp_free_i32(t3);
> +        }
> +        break;
> +    case LA_OPC_MULH_W:
> +        {
> +            TCGv_i32 t2 = tcg_temp_new_i32();
> +            TCGv_i32 t3 = tcg_temp_new_i32();
> +            tcg_gen_trunc_tl_i32(t2, t0);
> +            tcg_gen_trunc_tl_i32(t3, t1);
> +            tcg_gen_muls2_i32(t2, t3, t2, t3);
> +            tcg_gen_ext_i32_tl(cpu_gpr[rd], t3);
> +            tcg_temp_free_i32(t2);
> +            tcg_temp_free_i32(t3);
> +        }
> +        break;
> +    case LA_OPC_MULH_WU:
> +        {
> +            TCGv_i32 t2 = tcg_temp_new_i32();
> +            TCGv_i32 t3 = tcg_temp_new_i32();
> +            tcg_gen_trunc_tl_i32(t2, t0);
> +            tcg_gen_trunc_tl_i32(t3, t1);
> +            tcg_gen_mulu2_i32(t2, t3, t2, t3);
> +            tcg_gen_ext_i32_tl(cpu_gpr[rd], t3);
> +            tcg_temp_free_i32(t2);
> +            tcg_temp_free_i32(t3);
> +        }
> +        break;
> +    case LA_OPC_DIV_D:
> +        {
> +            TCGv t2 = tcg_temp_new();
> +            TCGv t3 = tcg_temp_new();
> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
> +            tcg_gen_and_tl(t2, t2, t3);
> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
> +            tcg_gen_or_tl(t2, t2, t3);
> +            tcg_gen_movi_tl(t3, 0);
> +            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
> +            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
> +            tcg_temp_free(t3);
> +            tcg_temp_free(t2);
> +        }
> +        break;
> +    case LA_OPC_MOD_D:
> +        {
> +            TCGv t2 = tcg_temp_new();
> +            TCGv t3 = tcg_temp_new();
> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
> +            tcg_gen_and_tl(t2, t2, t3);
> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
> +            tcg_gen_or_tl(t2, t2, t3);
> +            tcg_gen_movi_tl(t3, 0);
> +            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
> +            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
> +            tcg_temp_free(t3);
> +            tcg_temp_free(t2);
> +        }
> +        break;
> +    case LA_OPC_DIV_DU:
> +        {
> +            TCGv t2 = tcg_const_tl(0);
> +            TCGv t3 = tcg_const_tl(1);
> +            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
> +            tcg_gen_divu_i64(cpu_gpr[rd], t0, t1);
> +            tcg_temp_free(t3);
> +            tcg_temp_free(t2);
> +        }
> +        break;
> +    case LA_OPC_MOD_DU:
> +        {
> +            TCGv t2 = tcg_const_tl(0);
> +            TCGv t3 = tcg_const_tl(1);
> +            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
> +            tcg_gen_remu_i64(cpu_gpr[rd], t0, t1);
> +            tcg_temp_free(t3);
> +            tcg_temp_free(t2);
> +        }
> +        break;
> +    case LA_OPC_MUL_D:
> +        tcg_gen_mul_i64(cpu_gpr[rd], t0, t1);
> +        break;
> +    case LA_OPC_MULH_D:
> +        {
> +            TCGv t2 = tcg_temp_new();
> +            tcg_gen_muls2_i64(t2, cpu_gpr[rd], t0, t1);
> +            tcg_temp_free(t2);
> +        }
> +        break;
> +    case LA_OPC_MULH_DU:
> +        {
> +            TCGv t2 = tcg_temp_new();
> +            tcg_gen_mulu2_i64(t2, cpu_gpr[rd], t0, t1);
> +            tcg_temp_free(t2);
> +        }
> +        break;
> +    default:
> +        generate_exception_end(ctx, EXCP_INE);
> +        goto out;
> +    }
> + out:
> +    tcg_temp_free(t0);
> +    tcg_temp_free(t1);
> +
> +    return;
> +}

> +static bool trans_mul_w(DisasContext *ctx, arg_mul_w *a)
> +{
> +    gen_loongarch_muldiv(ctx, LA_OPC_MUL_W, a->rd, a->rj, a->rk);
> +    return true;
> +}
> +
> +static bool trans_mulh_w(DisasContext *ctx, arg_mulh_w *a)
> +{
> +    gen_loongarch_muldiv(ctx, LA_OPC_MULH_W, a->rd, a->rj, a->rk);
> +    return true;
> +}
> +
> +static bool trans_mulh_wu(DisasContext *ctx, arg_mulh_wu *a)
> +{
> +    gen_loongarch_muldiv(ctx, LA_OPC_MULH_WU, a->rd, a->rj, a->rk);
> +    return true;
> +}
> +
> +static bool trans_mul_d(DisasContext *ctx, arg_mul_d *a)
> +{
> +    check_loongarch_64(ctx);
> +    gen_loongarch_muldiv(ctx, LA_OPC_MUL_D, a->rd, a->rj, a->rk);
> +    return true;
> +}
> +
> +static bool trans_mulh_d(DisasContext *ctx, arg_mulh_d *a)
> +{
> +    check_loongarch_64(ctx);
> +    gen_loongarch_muldiv(ctx, LA_OPC_MULH_D, a->rd, a->rj, a->rk);
> +    return true;
> +}
> +
> +static bool trans_mulh_du(DisasContext *ctx, arg_mulh_du *a)
> +{
> +    check_loongarch_64(ctx);
> +    gen_loongarch_muldiv(ctx, LA_OPC_MULH_DU, a->rd, a->rj, a->rk);
> +    return true;
> +}

> +static bool trans_div_w(DisasContext *ctx, arg_div_w *a)
> +{
> +    gen_loongarch_muldiv(ctx, LA_OPC_DIV_W, a->rd, a->rj, a->rk);
> +    return true;
> +}
> +
> +static bool trans_mod_w(DisasContext *ctx, arg_mod_w *a)
> +{
> +    gen_loongarch_muldiv(ctx, LA_OPC_MOD_W, a->rd, a->rj, a->rk);
> +    return true;
> +}
> +
> +static bool trans_div_wu(DisasContext *ctx, arg_div_wu *a)
> +{
> +    gen_loongarch_muldiv(ctx, LA_OPC_DIV_WU, a->rd, a->rj, a->rk);
> +    return true;
> +}
> +
> +static bool trans_mod_wu(DisasContext *ctx, arg_mod_wu *a)
> +{
> +    gen_loongarch_muldiv(ctx, LA_OPC_MOD_WU, a->rd, a->rj, a->rk);
> +    return true;
> +}
> +
> +static bool trans_div_d(DisasContext *ctx, arg_div_d *a)
> +{
> +    check_loongarch_64(ctx);
> +    gen_loongarch_muldiv(ctx, LA_OPC_DIV_D, a->rd, a->rj, a->rk);
> +    return true;
> +}
> +
> +static bool trans_mod_d(DisasContext *ctx, arg_mod_d *a)
> +{
> +    check_loongarch_64(ctx);
> +    gen_loongarch_muldiv(ctx, LA_OPC_MOD_D, a->rd, a->rj, a->rk);
> +    return true;
> +}
> +
> +static bool trans_div_du(DisasContext *ctx, arg_div_du *a)
> +{
> +    check_loongarch_64(ctx);
> +    gen_loongarch_muldiv(ctx, LA_OPC_DIV_DU, a->rd, a->rj, a->rk);
> +    return true;
> +}
> +
> +static bool trans_mod_du(DisasContext *ctx, arg_mod_du *a)
> +{
> +    check_loongarch_64(ctx);
> +    gen_loongarch_muldiv(ctx, LA_OPC_MOD_DU, a->rd, a->rj, a->rk);
> +    return true;
> +}

It seems you are missing what decodetree is for... You should inline
each opcode code from gen_loongarch_muldiv in the opcode handler.

Don't take MIPS as an example =)