All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: david@redhat.com
Subject: [Qemu-devel] [PATCH 07/38] tcg: Manually expand INDEX_op_dup_vec
Date: Fri, 19 Apr 2019 21:34:11 -1000	[thread overview]
Message-ID: <20190420073442.7488-8-richard.henderson@linaro.org> (raw)
In-Reply-To: <20190420073442.7488-1-richard.henderson@linaro.org>

This case is similar to INDEX_op_mov_* in that we need to do
different things depending on the current location of the source.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/aarch64/tcg-target.inc.c |   9 ++--
 tcg/i386/tcg-target.inc.c    |   8 ++-
 tcg/tcg.c                    | 102 +++++++++++++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 10 deletions(-)

diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 116ebd8c1a..b272822969 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -2104,10 +2104,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
-    case INDEX_op_mov_vec:
     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
     case INDEX_op_movi_i64:
-    case INDEX_op_dupi_vec:
     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
     default:
         g_assert_not_reached();
@@ -2204,9 +2202,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_not_vec:
         tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
         break;
-    case INDEX_op_dup_vec:
-        tcg_out_dup_vec(s, type, vece, a0, a1);
-        break;
     case INDEX_op_shli_vec:
         tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
         break;
@@ -2250,6 +2245,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
             }
         }
         break;
+
+    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi.  */
+    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
     default:
         g_assert_not_reached();
     }
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 04e3d37b05..49691c4f56 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -2601,10 +2601,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
-    case INDEX_op_mov_vec:
     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
     case INDEX_op_movi_i64:
-    case INDEX_op_dupi_vec:
     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
     default:
         tcg_abort();
@@ -2793,9 +2791,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_st_vec:
         tcg_out_st(s, type, a0, a1, a2);
         break;
-    case INDEX_op_dup_vec:
-        tcg_out_dup_vec(s, type, vece, a0, a1);
-        break;
 
     case INDEX_op_x86_shufps_vec:
         insn = OPC_SHUFPS;
@@ -2837,6 +2832,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
         tcg_out8(s, a2);
         break;
 
+    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi.  */
+    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
     default:
         g_assert_not_reached();
     }
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 5ed9c7bee5..55498b63d7 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -3410,6 +3410,105 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
     }
 }
 
+static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
+{
+    const TCGLifeData arg_life = op->life;
+    TCGRegSet dup_out_regs, dup_in_regs;
+    TCGTemp *its, *ots;
+    TCGType itype, vtype;
+    unsigned vece;
+    bool ok;
+
+    ots = arg_temp(op->args[0]);
+    its = arg_temp(op->args[1]);
+
+    /* There should be no fixed vector registers.  */
+    tcg_debug_assert(!ots->fixed_reg);
+
+    itype = its->type;
+    vece = TCGOP_VECE(op);
+    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
+
+    if (its->val_type == TEMP_VAL_CONST) {
+        /* Propagate constant via movi -> dupi.  */
+        tcg_target_ulong val = its->val;
+        if (IS_DEAD_ARG(1)) {
+            temp_dead(s, its);
+        }
+        tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
+        return;
+    }
+
+    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
+    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
+
+    /* Allocate the output register now.  */
+    if (ots->val_type != TEMP_VAL_REG) {
+        TCGRegSet allocated_regs = s->reserved_regs;
+
+        if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
+            /* Make sure to not spill the input register. */
+            tcg_regset_set_reg(allocated_regs, its->reg);
+        }
+        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
+                                 op->output_pref[0], ots->indirect_base);
+        ots->val_type = TEMP_VAL_REG;
+        ots->mem_coherent = 0;
+        s->reg_to_temp[ots->reg] = ots;
+    }
+
+    switch (its->val_type) {
+    case TEMP_VAL_REG:
+        /*
+         * The dup constriaints must be broad, covering all possible VECE.
+         * However, tcg_op_dup_vec() gets to see the VECE and we allow it
+         * to fail, indicating that extra moves are required for that case.
+         */
+        if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
+            if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
+                goto done;
+            }
+            /* Try again from memory or a vector input register.  */
+        }
+        if (!its->mem_coherent) {
+            /*
+             * The input register is not synced, and so an extra store
+             * would be required to use memory.  Attempt an integer-vector
+             * register move first.  We do not have a TCGRegSet for this.
+             */
+            if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
+                break;
+            }
+            /* Sync the temp back to its slot and load from there.  */
+            temp_sync(s, its, s->reserved_regs, 0, 0);
+        }
+        /* fall through */
+
+    case TEMP_VAL_MEM:
+        /* TODO: dup from memory */
+        tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    /* We now have a vector input register, so dup must succeed. */
+    ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
+    tcg_debug_assert(ok);
+
+ done:
+    if (IS_DEAD_ARG(1)) {
+        temp_dead(s, its);
+    }
+    if (NEED_SYNC_ARG(0)) {
+        temp_sync(s, ots, s->reserved_regs, 0, 0);
+    }
+    if (IS_DEAD_ARG(0)) {
+        temp_dead(s, ots);
+    }
+}
+
 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
 {
     const TCGLifeData arg_life = op->life;
@@ -3978,6 +4077,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
         case INDEX_op_dupi_vec:
             tcg_reg_alloc_movi(s, op);
             break;
+        case INDEX_op_dup_vec:
+            tcg_reg_alloc_dup(s, op);
+            break;
         case INDEX_op_insn_start:
             if (num_insns >= 0) {
                 size_t off = tcg_current_code_size(s);
-- 
2.17.1

  parent reply	other threads:[~2019-04-20  7:35 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-20  7:34 [Qemu-devel] [PATCH 00/38] tcg vector improvements Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 01/38] target/arm: Fill in .opc for cmtst_op Richard Henderson
2019-04-23  8:00   ` David Hildenbrand
2019-04-20  7:34 ` [Qemu-devel] [PATCH 02/38] tcg: Assert fixed_reg is read-only Richard Henderson
2019-04-23  8:03   ` David Hildenbrand
2019-04-20  7:34 ` [Qemu-devel] [PATCH 03/38] tcg: Return bool success from tcg_out_mov Richard Henderson
2019-04-20 10:56   ` Philippe Mathieu-Daudé
2019-04-23  8:27   ` David Hildenbrand
2019-04-20  7:34 ` [Qemu-devel] [PATCH 04/38] tcg: Support cross-class moves without instruction support Richard Henderson
2019-04-23  8:29   ` David Hildenbrand
2019-04-20  7:34 ` [Qemu-devel] [PATCH 05/38] tcg: Allow add_vec, sub_vec, neg_vec, not_vec to be expanded Richard Henderson
2019-04-23  8:33   ` David Hildenbrand
2019-04-20  7:34 ` [Qemu-devel] [PATCH 06/38] tcg: Promote tcg_out_{dup, dupi}_vec to backend interface Richard Henderson
2019-04-20  7:34 ` Richard Henderson [this message]
2019-04-20  7:34 ` [Qemu-devel] [PATCH 08/38] tcg: Add tcg_out_dupm_vec to the " Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 09/38] tcg/i386: Implement tcg_out_dupm_vec Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 10/38] tcg/aarch64: " Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 11/38] tcg: Add INDEX_op_dup_mem_vec Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 12/38] tcg: Add gvec expanders for variable shift Richard Henderson
2019-04-23 19:04   ` David Hildenbrand
2019-04-23 19:28     ` Richard Henderson
2019-04-23 21:02       ` David Hildenbrand
2019-04-23 21:40         ` Richard Henderson
2019-04-23 21:57           ` David Hildenbrand
2019-04-20  7:34 ` [Qemu-devel] [PATCH 13/38] tcg/i386: Support vector variable shift opcodes Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 14/38] tcg/aarch64: " Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 15/38] tcg: Implement tcg_gen_gvec_3i() Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 16/38] tcg: Specify optional vector requirements with a list Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 17/38] tcg: Add gvec expanders for vector shift by scalar Richard Henderson
2019-04-23 18:58   ` David Hildenbrand
2019-04-23 19:21     ` Richard Henderson
2019-04-23 21:05       ` David Hildenbrand
2019-04-20  7:34 ` [Qemu-devel] [PATCH 18/38] tcg/i386: Support vector scalar shift opcodes Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 19/38] tcg: Add support for integer absolute value Richard Henderson
2019-04-23  8:52   ` Philippe Mathieu-Daudé
2019-04-23 18:37   ` David Hildenbrand
2019-04-23 22:09     ` Philippe Mathieu-Daudé
2019-04-23 22:29       ` Richard Henderson
2019-04-23 23:05         ` Philippe Mathieu-Daudé
2019-04-20  7:34 ` [Qemu-devel] [PATCH 20/38] tcg: Add support for vector " Richard Henderson
2019-04-23 18:35   ` David Hildenbrand
2019-04-20  7:34 ` [Qemu-devel] [PATCH 21/38] target/arm: Use tcg_gen_abs_i64 and tcg_gen_gvec_abs Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 22/38] target/cris: Use tcg_gen_abs_tl Richard Henderson
2019-04-23 10:09   ` Philippe Mathieu-Daudé
2019-04-20  7:34 ` [Qemu-devel] [PATCH 23/38] target/ppc: " Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 24/38] target/s390x: Use tcg_gen_abs_i64 Richard Henderson
2019-04-23 18:40   ` David Hildenbrand
2019-04-23 22:12   ` Philippe Mathieu-Daudé
2019-04-20  7:34 ` [Qemu-devel] [PATCH 25/38] target/xtensa: Use tcg_gen_abs_i32 Richard Henderson
2019-04-23 22:14   ` Philippe Mathieu-Daudé
2019-04-20  7:34 ` [Qemu-devel] [PATCH 26/38] tcg/i386: Support vector absolute value Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 27/38] tcg/aarch64: " Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 28/38] tcg: Add support for vector comparison select Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 29/38] tcg/i386: Support vector comparison select value Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 30/38] tcg/aarch64: " Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 31/38] target/ppc: Use vector variable shifts for VS{L, R, RA}{B, H, W, D} Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 32/38] target/arm: Vectorize USHL and SSHL Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 33/38] tcg/aarch64: Do not advertise minmax for MO_64 Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 34/38] tcg: Do not recreate INDEX_op_neg_vec unless supported Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 35/38] tcg: Introduce do_op3_nofail for vector expansion Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 36/38] tcg: Expand vector minmax using cmp+cmpsel Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 37/38] tcg/aarch64: Use MVNI for expansion of dupi Richard Henderson
2019-04-20  7:34 ` [Qemu-devel] [PATCH 38/38] tcg/aarch64: Use ORRI and BICI for vector logical operations Richard Henderson
2019-04-20  8:09 ` [Qemu-devel] [PATCH 00/38] tcg vector improvements no-reply
2019-04-23 19:15 ` David Hildenbrand
2019-04-23 20:26   ` Richard Henderson
2019-04-23 20:31     ` David Hildenbrand
2019-04-29 19:28 ` David Hildenbrand
2019-04-29 20:19   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190420073442.7488-8-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=david@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.