qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Laurent Desnogues <laurent.desnogues@gmail.com>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: Peter Maydell <peter.maydell@linaro.org>,
	qemu-arm <qemu-arm@nongnu.org>,
	"qemu-devel@nongnu.org" <qemu-devel@nongnu.org>
Subject: Re: [Qemu-devel] [Qemu-arm] [PATCH 6/7] target/arm: Simplify SMMLA, SMMLAR, SMMLS, SMMLSR
Date: Wed, 28 Aug 2019 09:22:14 +0200	[thread overview]
Message-ID: <CABoDooPO37bEjDF77_mU6Z57boRzuYaDP8DcTqtAOJxCjvBkRw@mail.gmail.com> (raw)
In-Reply-To: <20190808202616.13782-7-richard.henderson@linaro.org>

Hi Richard,

On Thu, Aug 8, 2019 at 10:28 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> All of the inputs to these instructions are 32-bits.  Rather than
> extend each input to 64-bits and then extract the high 32-bits of
> the output, use tcg_gen_muls2_i32 and other 32-bit generator functions.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/translate.c | 72 +++++++++++++++---------------------------
>  1 file changed, 26 insertions(+), 46 deletions(-)
>
> diff --git a/target/arm/translate.c b/target/arm/translate.c
> index ddc54e77e4..77154be743 100644
> --- a/target/arm/translate.c
> +++ b/target/arm/translate.c
> @@ -391,34 +391,6 @@ static void gen_revsh(TCGv_i32 var)
>      tcg_gen_ext16s_i32(var, var);
>  }
>
> -/* Return (b << 32) + a. Mark inputs as dead */
> -static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv_i32 b)
> -{
> -    TCGv_i64 tmp64 = tcg_temp_new_i64();
> -
> -    tcg_gen_extu_i32_i64(tmp64, b);
> -    tcg_temp_free_i32(b);
> -    tcg_gen_shli_i64(tmp64, tmp64, 32);
> -    tcg_gen_add_i64(a, tmp64, a);
> -
> -    tcg_temp_free_i64(tmp64);
> -    return a;
> -}
> -
> -/* Return (b << 32) - a. Mark inputs as dead. */
> -static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv_i32 b)
> -{
> -    TCGv_i64 tmp64 = tcg_temp_new_i64();
> -
> -    tcg_gen_extu_i32_i64(tmp64, b);
> -    tcg_temp_free_i32(b);
> -    tcg_gen_shli_i64(tmp64, tmp64, 32);
> -    tcg_gen_sub_i64(a, tmp64, a);
> -
> -    tcg_temp_free_i64(tmp64);
> -    return a;
> -}
> -
>  /* 32x32->64 multiply.  Marks inputs as dead.  */
>  static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
>  {
> @@ -8872,23 +8844,27 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
>                             (SMMUL, SMMLA, SMMLS) */
>                          tmp = load_reg(s, rm);
>                          tmp2 = load_reg(s, rs);
> -                        tmp64 = gen_muls_i64_i32(tmp, tmp2);
> +                        tcg_gen_muls2_i32(tmp2, tmp, tmp, tmp2);
>
>                          if (rd != 15) {
> -                            tmp = load_reg(s, rd);
> +                            tmp3 = load_reg(s, rd);
>                              if (insn & (1 << 6)) {
> -                                tmp64 = gen_subq_msw(tmp64, tmp);
> +                                tcg_gen_sub_i32(tmp, tmp, tmp3);

Shouldn't you subtract tmp from tmp3?

>                              } else {
> -                                tmp64 = gen_addq_msw(tmp64, tmp);
> +                                tcg_gen_add_i32(tmp, tmp, tmp3);
>                              }
> +                            tcg_temp_free_i32(tmp3);
>                          }
>                          if (insn & (1 << 5)) {
> -                            tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
> +                            /*
> +                             * Adding 0x80000000 to the 64-bit quantity
> +                             * means that we have carry in to the high
> +                             * word when the low word has the high bit set.
> +                             */
> +                            tcg_gen_shri_i32(tmp2, tmp2, 31);
> +                            tcg_gen_add_i32(tmp, tmp, tmp2);
>                          }
> -                        tcg_gen_shri_i64(tmp64, tmp64, 32);
> -                        tmp = tcg_temp_new_i32();
> -                        tcg_gen_extrl_i64_i32(tmp, tmp64);
> -                        tcg_temp_free_i64(tmp64);
> +                        tcg_temp_free_i32(tmp2);
>                          store_reg(s, rn, tmp);
>                          break;
>                      case 0:
> @@ -10114,22 +10090,26 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
>                    }
>                  break;
>              case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
> -                tmp64 = gen_muls_i64_i32(tmp, tmp2);
> +                tcg_gen_muls2_i32(tmp2, tmp, tmp, tmp2);
>                  if (rs != 15) {
> -                    tmp = load_reg(s, rs);
> +                    tmp3 = load_reg(s, rs);
>                      if (insn & (1 << 20)) {
> -                        tmp64 = gen_addq_msw(tmp64, tmp);
> +                        tcg_gen_add_i32(tmp, tmp, tmp3);
>                      } else {
> -                        tmp64 = gen_subq_msw(tmp64, tmp);
> +                        tcg_gen_sub_i32(tmp, tmp, tmp3);

Same here.

Also the way you do the computation means you don't propagate the
borrow from the lower 32-bit of the 64-bit product when doing the
subtraction.

Thanks,

Laurent

>                      }
> +                    tcg_temp_free_i32(tmp3);
>                  }
>                  if (insn & (1 << 4)) {
> -                    tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
> +                    /*
> +                     * Adding 0x80000000 to the 64-bit quantity
> +                     * means that we have carry in to the high
> +                     * word when the low word has the high bit set.
> +                     */
> +                    tcg_gen_shri_i32(tmp2, tmp2, 31);
> +                    tcg_gen_add_i32(tmp, tmp, tmp2);
>                  }
> -                tcg_gen_shri_i64(tmp64, tmp64, 32);
> -                tmp = tcg_temp_new_i32();
> -                tcg_gen_extrl_i64_i32(tmp, tmp64);
> -                tcg_temp_free_i64(tmp64);
> +                tcg_temp_free_i32(tmp2);
>                  break;
>              case 7: /* Unsigned sum of absolute differences.  */
>                  gen_helper_usad8(tmp, tmp, tmp2);
> --
> 2.17.1
>
>


  reply	other threads:[~2019-08-28  7:23 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-08 20:26 [Qemu-devel] [PATCH 0/7] target/arm: Misc cleanups Richard Henderson
2019-08-08 20:26 ` [Qemu-devel] [PATCH 1/7] target/arm: Use tcg_gen_extract_i32 for shifter_out_im Richard Henderson
2019-08-08 20:26 ` [Qemu-devel] [PATCH 2/7] target/arm: Use tcg_gen_deposit_i32 for PKHBT, PKHTB Richard Henderson
2019-08-08 20:26 ` [Qemu-devel] [PATCH 3/7] target/arm: Remove redundant shift tests Richard Henderson
2019-08-08 20:26 ` [Qemu-devel] [PATCH 4/7] target/arm: Use ror32 instead of open-coding the operation Richard Henderson
2019-08-08 20:26 ` [Qemu-devel] [PATCH 5/7] target/arm: Use tcg_gen_rotri_i32 for gen_swap_half Richard Henderson
2019-08-08 20:26 ` [Qemu-devel] [PATCH 6/7] target/arm: Simplify SMMLA, SMMLAR, SMMLS, SMMLSR Richard Henderson
2019-08-28  7:22   ` Laurent Desnogues [this message]
2019-08-08 20:26 ` [Qemu-devel] [PATCH 7/7] target/arm: Use tcg_gen_extrh_i64_i32 to extract the high word Richard Henderson
2019-08-15 10:16   ` Peter Maydell
2019-08-15 11:56     ` Richard Henderson
2019-08-15 12:02       ` Peter Maydell
2019-08-15 10:34 ` [Qemu-devel] [PATCH 0/7] target/arm: Misc cleanups Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CABoDooPO37bEjDF77_mU6Z57boRzuYaDP8DcTqtAOJxCjvBkRw@mail.gmail.com \
    --to=laurent.desnogues@gmail.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).