All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: David Hildenbrand <david@redhat.com>
Subject: [Qemu-devel] [PATCH v2 01/29] tcg: Implement tcg_gen_gvec_3i()
Date: Tue, 30 Apr 2019 22:05:08 -0700	[thread overview]
Message-ID: <20190501050536.15580-2-richard.henderson@linaro.org> (raw)
In-Reply-To: <20190501050536.15580-1-richard.henderson@linaro.org>

From: David Hildenbrand <david@redhat.com>

Let's add tcg_gen_gvec_3i(), similar to tcg_gen_gvec_2i(), however
without introducing "gen_helper_gvec_3i *fnoi", as it isn't needed
for now.

Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20190416185301.25344-2-david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tcg-op-gvec.h |  24 ++++++++
 tcg/tcg-op-gvec.c | 139 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 163 insertions(+)

diff --git a/tcg/tcg-op-gvec.h b/tcg/tcg-op-gvec.h
index 850da32ded..c093243c4c 100644
--- a/tcg/tcg-op-gvec.h
+++ b/tcg/tcg-op-gvec.h
@@ -164,6 +164,27 @@ typedef struct {
     bool load_dest;
 } GVecGen3;
 
+typedef struct {
+    /*
+     * Expand inline as a 64-bit or 32-bit integer. Only one of these will be
+     * non-NULL.
+     */
+    void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t);
+    void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t);
+    /* Expand inline with a host vector type.  */
+    void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, int64_t);
+    /* Expand out-of-line helper w/descriptor, data in descriptor.  */
+    gen_helper_gvec_3 *fno;
+    /* The opcode, if any, to which this corresponds.  */
+    TCGOpcode opc;
+    /* The vector element size, if applicable.  */
+    uint8_t vece;
+    /* Prefer i64 to v64.  */
+    bool prefer_i64;
+    /* Load dest as a 3rd source operand.  */
+    bool load_dest;
+} GVecGen3i;
+
 typedef struct {
     /* Expand inline as a 64-bit or 32-bit integer.
        Only one of these will be non-NULL.  */
@@ -193,6 +214,9 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
                      uint32_t maxsz, TCGv_i64 c, const GVecGen2s *);
 void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
                     uint32_t oprsz, uint32_t maxsz, const GVecGen3 *);
+void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                     uint32_t oprsz, uint32_t maxsz, int64_t c,
+                     const GVecGen3i *);
 void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
                     uint32_t oprsz, uint32_t maxsz, const GVecGen4 *);
 
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 0996ef0812..f831adb4e7 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -663,6 +663,29 @@ static void expand_3_i32(uint32_t dofs, uint32_t aofs,
     tcg_temp_free_i32(t0);
 }
 
+static void expand_3i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                          uint32_t oprsz, int32_t c, bool load_dest,
+                          void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t))
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    uint32_t i;
+
+    for (i = 0; i < oprsz; i += 4) {
+        tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+        tcg_gen_ld_i32(t1, cpu_env, bofs + i);
+        if (load_dest) {
+            tcg_gen_ld_i32(t2, cpu_env, dofs + i);
+        }
+        fni(t2, t0, t1, c);
+        tcg_gen_st_i32(t2, cpu_env, dofs + i);
+    }
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
 /* Expand OPSZ bytes worth of three-operand operations using i32 elements.  */
 static void expand_4_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
                          uint32_t cofs, uint32_t oprsz, bool write_aofs,
@@ -770,6 +793,29 @@ static void expand_3_i64(uint32_t dofs, uint32_t aofs,
     tcg_temp_free_i64(t0);
 }
 
+static void expand_3i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                          uint32_t oprsz, int64_t c, bool load_dest,
+                          void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t))
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    uint32_t i;
+
+    for (i = 0; i < oprsz; i += 8) {
+        tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+        tcg_gen_ld_i64(t1, cpu_env, bofs + i);
+        if (load_dest) {
+            tcg_gen_ld_i64(t2, cpu_env, dofs + i);
+        }
+        fni(t2, t0, t1, c);
+        tcg_gen_st_i64(t2, cpu_env, dofs + i);
+    }
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+}
+
 /* Expand OPSZ bytes worth of three-operand operations using i64 elements.  */
 static void expand_4_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
                          uint32_t cofs, uint32_t oprsz, bool write_aofs,
@@ -883,6 +929,35 @@ static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
     tcg_temp_free_vec(t0);
 }
 
+/*
+ * Expand OPSZ bytes worth of three-vector operands and an immediate operand
+ * using host vectors.
+ */
+static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+                          uint32_t bofs, uint32_t oprsz, uint32_t tysz,
+                          TCGType type, int64_t c, bool load_dest,
+                          void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec,
+                                      int64_t))
+{
+    TCGv_vec t0 = tcg_temp_new_vec(type);
+    TCGv_vec t1 = tcg_temp_new_vec(type);
+    TCGv_vec t2 = tcg_temp_new_vec(type);
+    uint32_t i;
+
+    for (i = 0; i < oprsz; i += tysz) {
+        tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+        tcg_gen_ld_vec(t1, cpu_env, bofs + i);
+        if (load_dest) {
+            tcg_gen_ld_vec(t2, cpu_env, dofs + i);
+        }
+        fni(vece, t2, t0, t1, c);
+        tcg_gen_st_vec(t2, cpu_env, dofs + i);
+    }
+    tcg_temp_free_vec(t0);
+    tcg_temp_free_vec(t1);
+    tcg_temp_free_vec(t2);
+}
+
 /* Expand OPSZ bytes worth of four-operand operations using host vectors.  */
 static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
                          uint32_t bofs, uint32_t cofs, uint32_t oprsz,
@@ -1174,6 +1249,70 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
     }
 }
 
+/* Expand a vector operation with three vectors and an immediate.  */
+void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                     uint32_t oprsz, uint32_t maxsz, int64_t c,
+                     const GVecGen3i *g)
+{
+    TCGType type;
+    uint32_t some;
+
+    check_size_align(oprsz, maxsz, dofs | aofs | bofs);
+    check_overlap_3(dofs, aofs, bofs, maxsz);
+
+    type = 0;
+    if (g->fniv) {
+        type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
+    }
+    switch (type) {
+    case TCG_TYPE_V256:
+        /*
+         * Recall that ARM SVE allows vector sizes that are not a
+         * power of 2, but always a multiple of 16.  The intent is
+         * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+         */
+        some = QEMU_ALIGN_DOWN(oprsz, 32);
+        expand_3i_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256,
+                      c, g->load_dest, g->fniv);
+        if (some == oprsz) {
+            break;
+        }
+        dofs += some;
+        aofs += some;
+        bofs += some;
+        oprsz -= some;
+        maxsz -= some;
+        /* fallthru */
+    case TCG_TYPE_V128:
+        expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128,
+                      c, g->load_dest, g->fniv);
+        break;
+    case TCG_TYPE_V64:
+        expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64,
+                      c, g->load_dest, g->fniv);
+        break;
+
+    case 0:
+        if (g->fni8 && check_size_impl(oprsz, 8)) {
+            expand_3i_i64(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni8);
+        } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+            expand_3i_i32(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni4);
+        } else {
+            assert(g->fno != NULL);
+            tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, c, g->fno);
+            return;
+        }
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    if (oprsz < maxsz) {
+        expand_clr(dofs + oprsz, maxsz - oprsz);
+    }
+}
+
 /* Expand a vector four-operand operation.  */
 void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
                     uint32_t oprsz, uint32_t maxsz, const GVecGen4 *g)
-- 
2.17.1

  reply	other threads:[~2019-05-01  5:05 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-01  5:05 [Qemu-devel] [PATCH v2 00/29] tcg vector improvements Richard Henderson
2019-05-01  5:05 ` Richard Henderson [this message]
2019-05-01 15:23   ` [Qemu-devel] [PATCH v2 01/29] tcg: Implement tcg_gen_gvec_3i() Alex Bennée
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 02/29] tcg: Do not recreate INDEX_op_neg_vec unless supported Richard Henderson
2019-05-01 15:26   ` Alex Bennée
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 03/29] tcg: Allow add_vec, sub_vec, neg_vec, not_vec to be expanded Richard Henderson
2019-05-01 15:56   ` Alex Bennée
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 04/29] tcg: Specify optional vector requirements with a list Richard Henderson
2019-05-01 17:24   ` Alex Bennée
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 05/29] tcg: Assert fixed_reg is read-only Richard Henderson
2019-05-01 17:26   ` Alex Bennée
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 06/29] tcg: Return bool success from tcg_out_mov Richard Henderson
2019-05-01 17:29   ` Alex Bennée
2019-05-01 20:31     ` Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 07/29] tcg: Support cross-class moves without instruction support Richard Henderson
2019-05-01 17:34   ` Alex Bennée
2019-05-01 20:18     ` Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 08/29] tcg: Promote tcg_out_{dup, dupi}_vec to backend interface Richard Henderson
2019-05-01 17:37   ` Alex Bennée
2019-05-01 20:21     ` Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 09/29] tcg: Manually expand INDEX_op_dup_vec Richard Henderson
2019-05-02  9:42   ` Alex Bennée
2019-05-02 15:24     ` Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 10/29] tcg: Add tcg_out_dupm_vec to the backend interface Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 11/29] tcg/i386: Implement tcg_out_dupm_vec Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 12/29] tcg/aarch64: " Richard Henderson
2019-05-02 13:26   ` Alex Bennée
2019-05-02 15:35     ` Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 13/29] tcg: Add INDEX_op_dup_mem_vec Richard Henderson
2019-05-02 13:30   ` Alex Bennée
2019-05-02 15:38     ` Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 14/29] tcg: Add gvec expanders for variable shift Richard Henderson
2019-05-02 14:08   ` Alex Bennée
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 15/29] tcg/i386: Support vector variable shift opcodes Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 16/29] tcg/aarch64: " Richard Henderson
2019-05-02 14:12   ` Alex Bennée
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 17/29] tcg: Add gvec expanders for vector shift by scalar Richard Henderson
2019-05-02 14:37   ` Alex Bennée
2019-05-02 15:46     ` Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 18/29] tcg/i386: Support vector scalar shift opcodes Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 19/29] tcg: Add support for integer absolute value Richard Henderson
2019-05-02 15:25   ` Alex Bennée
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 20/29] tcg: Add support for vector " Richard Henderson
2019-05-02 15:47   ` Alex Bennée
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 21/29] tcg/i386: Support " Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 22/29] tcg/aarch64: " Richard Henderson
2019-05-02 15:49   ` Alex Bennée
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 23/29] target/arm: Use tcg_gen_abs_i64 and tcg_gen_gvec_abs Richard Henderson
2019-05-01  5:05   ` Richard Henderson
2019-05-02 16:07   ` [Qemu-devel] [Qemu-arm] " Alex Bennée
2019-05-02 16:07     ` Alex Bennée
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 24/29] target/cris: Use tcg_gen_abs_tl Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 25/29] target/ppc: Use tcg_gen_abs_i32 Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 26/29] target/ppc: Use tcg_gen_abs_tl Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 27/29] target/s390x: Use tcg_gen_abs_i64 Richard Henderson
2019-05-02 13:44   ` David Hildenbrand
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 28/29] target/tricore: Use tcg_gen_abs_tl Richard Henderson
2019-05-01  5:05 ` [Qemu-devel] [PATCH v2 29/29] target/xtensa: Use tcg_gen_abs_i32 Richard Henderson
2019-05-01 15:15   ` Max Filippov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190501050536.15580-2-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=david@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.