All of lore.kernel.org
 help / color / mirror / Atom feed
From: Anton Blanchard <anton@samba.org>
To: David Gibson <david@gibson.dropbear.id.au>
Cc: peter.maydell@linaro.org, Richard Henderson <rth@twiddle.net>,
	qemu-ppc@nongnu.org, agraf@suse.de, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PULL 03/13] target-ppc: Use 32-bit rotate instead of deposit + 64-bit rotate
Date: Wed, 15 Jun 2016 22:17:19 +1000	[thread overview]
Message-ID: <20160615221719.12f246dd@kryten> (raw)
In-Reply-To: <1464318298-2456-4-git-send-email-david@gibson.dropbear.id.au>

Hi,

> From: Richard Henderson <rth@twiddle.net>
> 
> A 32-bit rotate insn is more common on hosts than a deposit insn,
> and if the host has neither the result is truely horrific.
> 
> At the same time, tidy up the temporaries within these functions,
> drop the over-use of "likely", drop some checks for identity that
> will also be checked by tcg-op.c functions, and special case mask
> without rotate within rlwinm.

This breaks masks that wrap:

        li      r3,-1
        li      r4,-1
        rlwnm   r3,r3,r4,22,8

We expect:

ffffffffff8003ff

But get:

ff8003ff

Anton

> Signed-off-by: Richard Henderson <rth@twiddle.net>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  target-ppc/translate.c | 172
> ++++++++++++++++++++----------------------------- 1 file changed, 70
> insertions(+), 102 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 3ea6625..b392ecc 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -1610,141 +1610,109 @@ static void gen_cntlzd(DisasContext *ctx)
>  /* rlwimi & rlwimi. */
>  static void gen_rlwimi(DisasContext *ctx)
>  {
> -    uint32_t mb, me, sh;
> -
> -    mb = MB(ctx->opcode);
> -    me = ME(ctx->opcode);
> -    sh = SH(ctx->opcode);
> -    if (likely(sh == (31-me) && mb <= me)) {
> -        tcg_gen_deposit_tl(cpu_gpr[rA(ctx->opcode)],
> cpu_gpr[rA(ctx->opcode)],
> -                           cpu_gpr[rS(ctx->opcode)], sh, me - mb +
> 1);
> +    TCGv t_ra = cpu_gpr[rA(ctx->opcode)];
> +    TCGv t_rs = cpu_gpr[rS(ctx->opcode)];
> +    uint32_t sh = SH(ctx->opcode);
> +    uint32_t mb = MB(ctx->opcode);
> +    uint32_t me = ME(ctx->opcode);
> +
> +    if (sh == (31-me) && mb <= me) {
> +        tcg_gen_deposit_tl(t_ra, t_ra, t_rs, sh, me - mb + 1);
>      } else {
>          target_ulong mask;
> +        TCGv_i32 t0;
>          TCGv t1;
> -        TCGv t0 = tcg_temp_new();
> -#if defined(TARGET_PPC64)
> -        tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)],
> -            cpu_gpr[rS(ctx->opcode)], 32, 32);
> -        tcg_gen_rotli_i64(t0, t0, sh);
> -#else
> -        tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh);
> -#endif
> +
>  #if defined(TARGET_PPC64)
>          mb += 32;
>          me += 32;
>  #endif
>          mask = MASK(mb, me);
> +
> +        t0 = tcg_temp_new_i32();
>          t1 = tcg_temp_new();
> -        tcg_gen_andi_tl(t0, t0, mask);
> -        tcg_gen_andi_tl(t1, cpu_gpr[rA(ctx->opcode)], ~mask);
> -        tcg_gen_or_tl(cpu_gpr[rA(ctx->opcode)], t0, t1);
> -        tcg_temp_free(t0);
> +        tcg_gen_trunc_tl_i32(t0, t_rs);
> +        tcg_gen_rotli_i32(t0, t0, sh);
> +        tcg_gen_extu_i32_tl(t1, t0);
> +        tcg_temp_free_i32(t0);
> +
> +        tcg_gen_andi_tl(t1, t1, mask);
> +        tcg_gen_andi_tl(t_ra, t_ra, ~mask);
> +        tcg_gen_or_tl(t_ra, t_ra, t1);
>          tcg_temp_free(t1);
>      }
> -    if (unlikely(Rc(ctx->opcode) != 0))
> -        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
> +    if (unlikely(Rc(ctx->opcode) != 0)) {
> +        gen_set_Rc0(ctx, t_ra);
> +    }
>  }
>  
>  /* rlwinm & rlwinm. */
>  static void gen_rlwinm(DisasContext *ctx)
>  {
> -    uint32_t mb, me, sh;
> -
> -    sh = SH(ctx->opcode);
> -    mb = MB(ctx->opcode);
> -    me = ME(ctx->opcode);
> +    TCGv t_ra = cpu_gpr[rA(ctx->opcode)];
> +    TCGv t_rs = cpu_gpr[rS(ctx->opcode)];
> +    uint32_t sh = SH(ctx->opcode);
> +    uint32_t mb = MB(ctx->opcode);
> +    uint32_t me = ME(ctx->opcode);
>  
> -    if (likely(mb == 0 && me == (31 - sh))) {
> -        if (likely(sh == 0)) {
> -            tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)],
> cpu_gpr[rS(ctx->opcode)]);
> -        } else {
> -            TCGv t0 = tcg_temp_new();
> -            tcg_gen_ext32u_tl(t0, cpu_gpr[rS(ctx->opcode)]);
> -            tcg_gen_shli_tl(t0, t0, sh);
> -            tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], t0);
> -            tcg_temp_free(t0);
> -        }
> -    } else if (likely(sh != 0 && me == 31 && sh == (32 - mb))) {
> -        TCGv t0 = tcg_temp_new();
> -        tcg_gen_ext32u_tl(t0, cpu_gpr[rS(ctx->opcode)]);
> -        tcg_gen_shri_tl(t0, t0, mb);
> -        tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], t0);
> -        tcg_temp_free(t0);
> -    } else if (likely(mb == 0 && me == 31)) {
> -        TCGv_i32 t0 = tcg_temp_new_i32();
> -        tcg_gen_trunc_tl_i32(t0, cpu_gpr[rS(ctx->opcode)]);
> -        tcg_gen_rotli_i32(t0, t0, sh);
> -        tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t0);
> -        tcg_temp_free_i32(t0);
> +    if (mb == 0 && me == (31 - sh)) {
> +        tcg_gen_shli_tl(t_ra, t_rs, sh);
> +        tcg_gen_ext32u_tl(t_ra, t_ra);
> +    } else if (sh != 0 && me == 31 && sh == (32 - mb)) {
> +        tcg_gen_ext32u_tl(t_ra, t_rs);
> +        tcg_gen_shri_tl(t_ra, t_ra, mb);
>      } else {
> -        TCGv t0 = tcg_temp_new();
> -#if defined(TARGET_PPC64)
> -        tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)],
> -            cpu_gpr[rS(ctx->opcode)], 32, 32);
> -        tcg_gen_rotli_i64(t0, t0, sh);
> -#else
> -        tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh);
> -#endif
>  #if defined(TARGET_PPC64)
>          mb += 32;
>          me += 32;
>  #endif
> -        tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me));
> -        tcg_temp_free(t0);
> +        if (sh == 0) {
> +            tcg_gen_andi_tl(t_ra, t_rs, MASK(mb, me));
> +        } else {
> +            TCGv_i32 t0 = tcg_temp_new_i32();
> +
> +            tcg_gen_trunc_tl_i32(t0, t_rs);
> +            tcg_gen_rotli_i32(t0, t0, sh);
> +            tcg_gen_andi_i32(t0, t0, MASK(mb, me));
> +            tcg_gen_extu_i32_tl(t_ra, t0);
> +            tcg_temp_free_i32(t0);
> +        }
> +    }
> +    if (unlikely(Rc(ctx->opcode) != 0)) {
> +        gen_set_Rc0(ctx, t_ra);
>      }
> -    if (unlikely(Rc(ctx->opcode) != 0))
> -        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
>  }
>  
>  /* rlwnm & rlwnm. */
>  static void gen_rlwnm(DisasContext *ctx)
>  {
> -    uint32_t mb, me;
> -    mb = MB(ctx->opcode);
> -    me = ME(ctx->opcode);
> +    TCGv t_ra = cpu_gpr[rA(ctx->opcode)];
> +    TCGv t_rs = cpu_gpr[rS(ctx->opcode)];
> +    TCGv t_rb = cpu_gpr[rB(ctx->opcode)];
> +    uint32_t mb = MB(ctx->opcode);
> +    uint32_t me = ME(ctx->opcode);
> +    TCGv_i32 t0, t1;
>  
> -    if (likely(mb == 0 && me == 31)) {
> -        TCGv_i32 t0, t1;
> -        t0 = tcg_temp_new_i32();
> -        t1 = tcg_temp_new_i32();
> -        tcg_gen_trunc_tl_i32(t0, cpu_gpr[rB(ctx->opcode)]);
> -        tcg_gen_trunc_tl_i32(t1, cpu_gpr[rS(ctx->opcode)]);
> -        tcg_gen_andi_i32(t0, t0, 0x1f);
> -        tcg_gen_rotl_i32(t1, t1, t0);
> -        tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t1);
> -        tcg_temp_free_i32(t0);
> -        tcg_temp_free_i32(t1);
> -    } else {
> -        TCGv t0;
>  #if defined(TARGET_PPC64)
> -        TCGv t1;
> +    mb += 32;
> +    me += 32;
>  #endif
>  
> -        t0 = tcg_temp_new();
> -        tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1f);
> -#if defined(TARGET_PPC64)
> -        t1 = tcg_temp_new_i64();
> -        tcg_gen_deposit_i64(t1, cpu_gpr[rS(ctx->opcode)],
> -                            cpu_gpr[rS(ctx->opcode)], 32, 32);
> -        tcg_gen_rotl_i64(t0, t1, t0);
> -        tcg_temp_free_i64(t1);
> -#else
> -        tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx->opcode)], t0);
> -#endif
> -        if (unlikely(mb != 0 || me != 31)) {
> -#if defined(TARGET_PPC64)
> -            mb += 32;
> -            me += 32;
> -#endif
> -            tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb,
> me));
> -        } else {
> -            tcg_gen_andi_tl(t0, t0, MASK(32, 63));
> -            tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0);
> -        }
> -        tcg_temp_free(t0);
> +    t0 = tcg_temp_new_i32();
> +    t1 = tcg_temp_new_i32();
> +    tcg_gen_trunc_tl_i32(t0, t_rb);
> +    tcg_gen_trunc_tl_i32(t1, t_rs);
> +    tcg_gen_andi_i32(t0, t0, 0x1f);
> +    tcg_gen_rotl_i32(t1, t1, t0);
> +    tcg_temp_free_i32(t0);
> +
> +    tcg_gen_andi_i32(t1, t1, MASK(mb, me));
> +    tcg_gen_extu_i32_tl(t_ra, t1);
> +    tcg_temp_free_i32(t1);
> +
> +    if (unlikely(Rc(ctx->opcode) != 0)) {
> +        gen_set_Rc0(ctx, t_ra);
>      }
> -    if (unlikely(Rc(ctx->opcode) != 0))
> -        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
>  }
>  
>  #if defined(TARGET_PPC64)

  reply	other threads:[~2016-06-15 12:17 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-05-27  3:04 [Qemu-devel] [PULL 00/13] ppc-for-2.7 queue 20160527 David Gibson
2016-05-27  3:04 ` [Qemu-devel] [PULL 01/13] target-ppc: Correct KVM synchronization for ppc_hash64_set_external_hpt() David Gibson
2016-05-27  3:04 ` [Qemu-devel] [PULL 02/13] target-ppc: Use movcond in isel David Gibson
2016-05-27  3:04 ` [Qemu-devel] [PULL 03/13] target-ppc: Use 32-bit rotate instead of deposit + 64-bit rotate David Gibson
2016-06-15 12:17   ` Anton Blanchard [this message]
2016-06-16  5:19     ` David Gibson
2016-06-16 19:04       ` Richard Henderson
2016-06-17 14:27         ` Anton Blanchard
2016-06-18  4:02           ` Anton Blanchard
2016-06-18  5:10             ` Richard Henderson
2016-06-20  8:21             ` Thomas Huth
2016-06-20  8:56               ` Peter Maydell
2016-06-20  9:08                 ` Thomas Huth
2016-05-27  3:04 ` [Qemu-devel] [PULL 04/13] target-ppc: Cleanups to rldinm, rldnm, rldimi David Gibson
2016-05-27  3:04 ` [Qemu-devel] [PULL 05/13] hw/net/spapr_llan: Delay flushing of the RX queue while adding new RX buffers David Gibson
2016-05-27  3:04 ` [Qemu-devel] [PULL 06/13] hw/net/spapr_llan: Provide counter with dropped rx frames to the guest David Gibson
2016-05-27  3:04 ` [Qemu-devel] [PULL 07/13] Added negative check for get_image_size() David Gibson
2016-05-27  3:04 ` [Qemu-devel] [PULL 08/13] PPC/KVM: early validation of vcpu id David Gibson
2016-05-27  3:04 ` [Qemu-devel] [PULL 09/13] spapr: ensure device trees are always associated with DRC David Gibson
2016-05-27  3:04 ` [Qemu-devel] [PULL 10/13] spapr_pci: Use correct DMA LIOBN when composing the device tree David Gibson
2016-05-27  3:04 ` [Qemu-devel] [PULL 11/13] spapr_iommu: Finish renaming vfio_accel to need_vfio David Gibson
2016-05-27  3:04 ` [Qemu-devel] [PULL 12/13] spapr_iommu: Move table allocation to helpers David Gibson
2016-05-27  3:04 ` [Qemu-devel] [PULL 13/13] MAINTAINERS: Add David Gibson as ppc maintainer David Gibson
2016-05-27  9:56 ` [Qemu-devel] [PULL 00/13] ppc-for-2.7 queue 20160527 Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160615221719.12f246dd@kryten \
    --to=anton@samba.org \
    --cc=agraf@suse.de \
    --cc=david@gibson.dropbear.id.au \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.