All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: "Chen Gang" <xili_gchen_5257@hotmail.com>,
	"Peter Maydell" <peter.maydell@linaro.org>,
	"Andreas Färber" <afaerber@suse.de>,
	"Chris Metcalf" <cmetcalf@ezchip.com>
Cc: "walt@tilera.com" <walt@tilera.com>,
	Riku Voipio <riku.voipio@iki.fi>,
	qemu-devel <qemu-devel@nongnu.org>
Subject: Re: [Qemu-devel] [PATCH 09/10 v10] target-tilegx: Generate tcg instructions to execute to _init_malloc in glib
Date: Mon, 11 May 2015 09:55:27 -0700	[thread overview]
Message-ID: <5550DEFF.8050204@twiddle.net> (raw)
In-Reply-To: <BLU436-SMTP63FD3A6167A47FDC4A3D91B9DC0@phx.gbl>

On 05/10/2015 03:45 PM, Chen Gang wrote:
> +static void gen_cmpltsi(struct DisasContext *dc,
> +                        uint8_t rdst, uint8_t rsrc, int8_t imm8)
> +{
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmpltsi r%d, r%d, %d\n",
> +                  rdst, rsrc, imm8);
> +    tcg_gen_setcondi_i64(TCG_COND_LTU, dest_gr(dc, rdst), load_gr(dc, rsrc),
> +                         (int64_t)imm8);
> +}
> +
> +static void gen_cmpltui(struct DisasContext *dc,
> +                        uint8_t rdst, uint8_t rsrc, uint8_t imm8)
> +{
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmpltui r%d, r%d, %d\n",
> +                  rdst, rsrc, imm8);
> +    tcg_gen_setcondi_i64(TCG_COND_LTU,
> +                         dest_gr(dc, rdst), load_gr(dc, rsrc), imm8);
> +}
> +
> +static void gen_cmpeqi(struct DisasContext *dc,
> +                       uint8_t rdst, uint8_t rsrc, uint8_t imm8)
> +{
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmpeqi r%d, r%d, %d\n", rdst, rsrc, imm8);
> +    tcg_gen_setcondi_i64(TCG_COND_EQ,
> +                         dest_gr(dc, rdst), load_gr(dc, rsrc), imm8);
> +}

Merge these.

> +
> +static void gen_cmp(struct DisasContext *dc,
> +                    uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, TCGCond cond)
> +{
> +    const char *prefix;
> +
> +    switch (cond) {
> +    case TCG_COND_EQ:
> +        prefix = "eq";
> +        break;
> +    case TCG_COND_LE:
> +        prefix = "les";
> +        break;
> +    case TCG_COND_LEU:
> +        prefix = "leu";
> +        break;
> +    case TCG_COND_LT:
> +        prefix = "lts";
> +        break;
> +    case TCG_COND_LTU:
> +        prefix = "ltu";
> +        break;
> +    case TCG_COND_NE:
> +        prefix = "ne";
> +        break;
> +    default:
> +        dc->exception = TILEGX_EXCP_OPCODE_UNKNOWN;
> +        return;
> +    }
> +
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmp%s r%d, r%d, r%d\n",
> +                  prefix, rdst, rsrc, rsrcb);

Better to just pass down the opcode name with the TCGCond rather than trying to
recreate it.  Then there's no need for a switch, nor a need for a confusing
TILEGX_EXCP_OPCODE_UNKNOWN path.

> +static void gen_exch(struct DisasContext *dc,
> +                     uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, int excp)
> +{
> +    const char *prefix, *width;
> +
> +    switch (excp) {
> +    case TILEGX_EXCP_OPCODE_EXCH4:
> +        prefix = "";
> +        width = "4";
> +        break;
> +    case TILEGX_EXCP_OPCODE_EXCH:
> +        prefix = "";
> +        width = "";
> +        break;
> +    case TILEGX_EXCP_OPCODE_CMPEXCH4:
> +        prefix = "cmp";
> +        width = "4";
> +        break;
> +    case TILEGX_EXCP_OPCODE_CMPEXCH:
> +        prefix = "cmp";
> +        width = "";
> +        break;
> +    default:
> +        dc->exception = TILEGX_EXCP_OPCODE_UNKNOWN;
> +        return;
> +    }

Likewise.

> +static void gen_v1cmpeqi(struct DisasContext *dc,
> +                         uint8_t rdst, uint8_t rsrc, uint8_t imm8)
> +{
> +    int count;
> +    TCGv vdst = dest_gr(dc, rdst);
> +    TCGv tmp = tcg_temp_new_i64();
> +
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "v1cmpeqi r%d, r%d, %d\n",
> +                  rdst, rsrc, imm8);
> +
> +    tcg_gen_movi_i64(vdst, 0);
> +
> +    for (count = 0; count < 8; count++) {
> +        tcg_gen_shri_i64(tmp, load_gr(dc, rsrc), (8 - count - 1) * 8);
> +        tcg_gen_andi_i64(tmp, tmp, 0xff);
> +        tcg_gen_setcondi_i64(TCG_COND_EQ, tmp, tmp, imm8);
> +        tcg_gen_or_i64(vdst, vdst, tmp);
> +        tcg_gen_shli_i64(vdst, vdst, 8);

For all of these vector instructions, I would encourage you to use helpers to
extract and insert values.  Extraction has little choice but to use a shift and
a mask, as you use here.  But insertion can use tcg_gen_deposit_i64.  I think
that is a lot easier to reason with than your construction here which
sequentially shifts vdst.

E.g.

static inline void
extract_v1(TCGv out, TCGv in, unsigned byte)
{
  tcg_gen_shri_i64(out, in, byte * 8);
  tcg_gen_ext8u_i64(out, out);
}

static inline void
insert_v1(TCGv out, TCGv in, unsigned byte)
{
  tcg_gen_deposit_i64(out, out, in, byte * 8, 8);
}


This loop then becomes

	TCGv vsrc = load_gr(dc, src);
	for (count = 0; count < 8; ++count) {
	    extract_v1(tmp, vsrc, count);
	    tcg_gen_setcondi_i64(TCG_COND_EQ, tmp, tmp, imm8);
	    insert_v1(vdst, tmp, count);
	}

> +static void gen_v1int_l(struct DisasContext *dc,
> +                        uint8_t rdst, uint8_t rsrc, uint8_t rsrcb)
> +{
> +    int count;
> +    TCGv vdst = dest_gr(dc, rdst);
> +    TCGv tmp = tcg_temp_new_i64();
> +
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "v1int_l r%d, r%d, r%d\n",
> +                  rdst, rsrc, rsrcb);
> +
> +    tcg_gen_movi_i64(vdst, 0);
> +    for (count = 0; count < 4; count++) {
> +
> +        tcg_gen_shli_i64(vdst, vdst, 8);
> +
> +        tcg_gen_shri_i64(tmp, load_gr(dc, rsrc), (4 - count - 1) * 8);
> +        tcg_gen_andi_i64(tmp, tmp, 0xff);
> +        tcg_gen_or_i64(vdst, vdst, tmp);
> +        tcg_gen_shli_i64(vdst, vdst, 8);
> +
> +        tcg_gen_shri_i64(tmp, load_gr(dc, rsrcb), (4 - count - 1) * 8);
> +        tcg_gen_andi_i64(tmp, tmp, 0xff);
> +        tcg_gen_or_i64(vdst, vdst, tmp);
> +    }
> +    tcg_temp_free_i64(tmp);

	TCGv vsrc = load_gr(dc, rsrc);
	TCGv vsrcb = load_gr(dc, rsrcb);

	for (count = 0; count < 4; ++count) {
	    extract_v1(tmp, vsrc, count);
	    insert_v1(vdst, tmp, count * 2);
	    extract_v1(tmp, vsrcb, count);
	    insert_v1(vdst, tmp, count * 2 + 1);
	}


> +}
> +
> +/*
> + * Functional Description
> + *
> + *        uint64_t output = 0;
> + *        uint32_t counter;
> + *        for (counter = 0; counter < (WORD_SIZE / 32); counter++)
> + *        {
> + *            bool asel = ((counter & 1) == 1);
> + *            int in_sel = 0 + counter / 2;
> + *            int32_t srca = get4Byte (rf[SrcA], in_sel);
> + *            int32_t srcb = get4Byte (rf[SrcB], in_sel);
> + *            output = set4Byte (output, counter, (asel ? srca : srcb));
> + *        }
> + *        rf[Dest] = output;
> +*/
> +
> +static void gen_v4int_l(struct DisasContext *dc,
> +                        uint8_t rdst, uint8_t rsrc, uint8_t rsrcb)
> +{
> +    TCGv vdst = dest_gr(dc, rdst);
> +    TCGv tmp = tcg_temp_new_i64();
> +
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "v4int_l r%d, r%d, r%d\n",
> +                  rdst, rsrc, rsrcb);
> +
> +    tcg_gen_andi_i64(vdst, load_gr(dc, rsrc), 0xffffffff);
> +    tcg_gen_shli_i64(vdst, vdst, 8);
> +    tcg_gen_andi_i64(tmp, load_gr(dc, rsrcb), 0xffffffff);
> +    tcg_gen_or_i64(vdst, vdst, tmp);

And herein is a bug, that I'd hope using the helper functions would avoid: you
shift by 8 instead of 32.  This function simplifies to

	tcg_gen_deposit_i64(vdst, load_gr(dc, rsrc), load_gr(dc, rsrcb),
                            32, 32);

> +/*
> + * uint64_t mask = 0;
> + * int64_t background = ((rf[SrcA] >> BFEnd) & 1) ? -1ULL : 0ULL;
> + * mask = ((-1ULL) ^ ((-1ULL << ((BFEnd - BFStart) & 63)) << 1));
> + * uint64_t rot_src = (((uint64_t) rf[SrcA]) >> BFStart)
> + *                    | (rf[SrcA] << (64 - BFStart));
> + * rf[Dest] = (rot_src & mask) | (background & ~mask);
> + */
> +static void gen_bfexts(struct DisasContext *dc, uint8_t rdst, uint8_t rsrc,
> +                       uint8_t start, uint8_t end)
> +{
> +    uint64_t mask = (-1ULL) ^ ((-1ULL << ((end - start) & 63)) << 1);
> +    TCGv vldst = tcg_temp_local_new_i64();
> +    TCGv tmp = tcg_temp_local_new_i64();
> +    TCGv pmsk = tcg_const_i64(-1ULL);

Why the local temps?

> +
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "bfexts r%d, r%d, %d, %d\n",
> +                  rdst, rsrc, start, end);
> +
> +    tcg_gen_rotri_i64(vldst, load_gr(dc, rsrc), start);
> +    tcg_gen_andi_i64(vldst, vldst, mask);
> +
> +    tcg_gen_shri_i64(tmp, load_gr(dc, rsrc), end);
> +    tcg_gen_andi_i64(tmp, tmp, 1);
> +    tcg_gen_movcond_i64(TCG_COND_EQ, tmp, tmp, load_zero(dc),
> +                        load_zero(dc), pmsk);

This movcond is equivalent to negation.

> +/*
> +   Functional Description
> +       uint64_t a = rf[SrcA];
> +       uint64_t b = rf[SrcB];
> +       uint64_t d = rf[Dest];
> +       uint64_t output = 0;
> +       unsigned int counter;
> +       for (counter = 0; counter < (WORD_SIZE / BYTE_SIZE); counter++)
> +       {
> +           int sel = getByte (b, counter) & 0xf;
> +           uint8_t byte = (sel < 8) ? getByte (d, sel) : getByte (a, (sel - 8));
> +           output = setByte (output, counter, byte);
> +       }
> +       rf[Dest] = output;
> + */
> +static void gen_shufflebytes(struct DisasContext *dc,
> +                             uint8_t rdst, uint8_t rsrc, uint8_t rsrcb)

I strongly suggest this be moved to op_helper.c.  It's too big.

> +/* FIXME: At present, skip unalignment checking */
> +static void gen_ld(struct DisasContext *dc,
> +                   uint8_t rdst, uint8_t rsrc, TCGMemOp ops)

Alignment checks would never be done here anyway.
Again, pass down the opcode string rather than rebuild.


r~

  reply	other threads:[~2015-05-11 16:55 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-10 22:36 [Qemu-devel] [PATCH 00/10 v10] tilegx: Firstly add tilegx target for linux-user Chen Gang
2015-05-10 22:38 ` [Qemu-devel] [PATCH 01/10 v10] linux-user: tilegx: Firstly add architecture related features Chen Gang
2015-05-10 22:39 ` [Qemu-devel] [PATCH 02/10 v10] linux-user: Support tilegx architecture in linux-user Chen Gang
2015-05-10 22:40 ` [Qemu-devel] [PATCH 03/10 v10] linux-user/syscall.c: Conditionalize syscalls which are not defined in tilegx Chen Gang
2015-05-10 22:41 ` [Qemu-devel] [PATCH 04/10 v10] target-tilegx: Add opcode basic implementation from Tilera Corporation Chen Gang
2015-05-10 22:43 ` [Qemu-devel] [PATCH 06/10 v10] target-tilegx: Add special register information " Chen Gang
2015-05-10 22:44 ` [Qemu-devel] [PATCH 07/10 v10] target-tilegx: Add cpu basic features for linux-user Chen Gang
2015-05-10 22:44 ` [Qemu-devel] [PATCH 08/10 v10] target-tilegx: Add helper " Chen Gang
2015-05-10 22:45 ` [Qemu-devel] [PATCH 09/10 v10] target-tilegx: Generate tcg instructions to execute to _init_malloc in glib Chen Gang
2015-05-11 16:55   ` Richard Henderson [this message]
2015-05-11 21:26     ` Chen Gang
2015-05-26 21:39       ` Chen Gang
2015-06-01 20:58         ` Chen Gang
2015-05-14 14:56     ` Chen Gang
2015-05-14 15:08     ` Chen Gang
2015-05-14 16:05     ` Chen Gang
2015-05-15  2:31       ` Chen Gang
2015-05-29 19:29     ` Chen Gang
2015-05-10 22:46 ` [Qemu-devel] [PATCH 10/10 v10] target-tilegx: Add TILE-Gx building files Chen Gang
     [not found] ` <BLU437-SMTP59B35F884A72A991334DBB9DC0@phx.gbl>
2015-05-11 16:01   ` [Qemu-devel] [PATCH 05/10 v10] target-tilegx/opcode_tilegx.h: Modify it to fit qemu using Richard Henderson
2015-05-11 21:06     ` Chen Gang
2015-05-11 22:06       ` Richard Henderson
2015-05-12  0:43         ` gchen gchen
2015-05-12 10:56           ` Chen Gang
2015-05-12 11:08             ` Peter Maydell
2015-05-12 11:16               ` Chen Gang
2015-05-19  2:47                 ` Chen Gang
2015-05-21 20:59                   ` Chen Gang
2015-05-21 23:40                     ` Chris Metcalf
2015-05-22  1:48                       ` Chen Gang
2015-05-24 22:03                         ` Chen Gang
2015-05-25 15:13                           ` Chen Gang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5550DEFF.8050204@twiddle.net \
    --to=rth@twiddle.net \
    --cc=afaerber@suse.de \
    --cc=cmetcalf@ezchip.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=riku.voipio@iki.fi \
    --cc=walt@tilera.com \
    --cc=xili_gchen_5257@hotmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.