All of lore.kernel.org
 help / color / mirror / Atom feed
From: matheus.ferst@eldorado.org.br
To: qemu-devel@nongnu.org, qemu-ppc@nongnu.org
Cc: danielhb413@gmail.com, richard.henderson@linaro.org,
	groug@kaod.org, clg@kaod.org,
	Matheus Ferst <matheus.ferst@eldorado.org.br>,
	david@gibson.dropbear.id.au
Subject: [PATCH v2 24/38] tcg/tcg-op-gvec.c: Introduce tcg_gen_gvec_4i
Date: Tue, 25 Jan 2022 09:19:29 -0300	[thread overview]
Message-ID: <20220125121943.3269077-25-matheus.ferst@eldorado.org.br> (raw)
In-Reply-To: <20220125121943.3269077-1-matheus.ferst@eldorado.org.br>

From: Matheus Ferst <matheus.ferst@eldorado.org.br>

Following the implementation of tcg_gen_gvec_3i, add a four-vector and
immediate operand expansion method.

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
---
 include/tcg/tcg-op-gvec.h |  22 ++++++
 tcg/tcg-op-gvec.c         | 146 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 168 insertions(+)

diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index da55fed870..28cafbcc5c 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -218,6 +218,25 @@ typedef struct {
     bool write_aofs;
 } GVecGen4;
 
+typedef struct {
+    /*
+     * Expand inline as a 64-bit or 32-bit integer. Only one of these will be
+     * non-NULL.
+     */
+    void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, int64_t);
+    void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, int32_t);
+    /* Expand inline with a host vector type.  */
+    void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec, int64_t);
+    /* Expand out-of-line helper w/descriptor, data in descriptor.  */
+    gen_helper_gvec_4 *fno;
+    /* The optional opcodes, if any, utilized by .fniv.  */
+    const TCGOpcode *opt_opc;
+    /* The vector element size, if applicable.  */
+    uint8_t vece;
+    /* Prefer i64 to v64.  */
+    bool prefer_i64;
+} GVecGen4i;
+
 void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
                     uint32_t oprsz, uint32_t maxsz, const GVecGen2 *);
 void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
@@ -231,6 +250,9 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs,
                      const GVecGen3i *);
 void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
                     uint32_t oprsz, uint32_t maxsz, const GVecGen4 *);
+void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
+                     uint32_t oprsz, uint32_t maxsz, int64_t c,
+                     const GVecGen4i *);
 
 /* Expand a specific vector operation.  */
 
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index ffe55e908f..079a761b04 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -836,6 +836,30 @@ static void expand_4_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
     tcg_temp_free_i32(t0);
 }
 
+static void expand_4i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                          uint32_t cofs, uint32_t oprsz, int32_t c,
+                          void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32,
+                                      int32_t))
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    TCGv_i32 t3 = tcg_temp_new_i32();
+    uint32_t i;
+
+    for (i = 0; i < oprsz; i += 4) {
+        tcg_gen_ld_i32(t1, cpu_env, aofs + i);
+        tcg_gen_ld_i32(t2, cpu_env, bofs + i);
+        tcg_gen_ld_i32(t3, cpu_env, cofs + i);
+        fni(t0, t1, t2, t3, c);
+        tcg_gen_st_i32(t0, cpu_env, dofs + i);
+    }
+    tcg_temp_free_i32(t3);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
+}
+
 /* Expand OPSZ bytes worth of two-operand operations using i64 elements.  */
 static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
                          bool load_dest, void (*fni)(TCGv_i64, TCGv_i64))
@@ -971,6 +995,30 @@ static void expand_4_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
     tcg_temp_free_i64(t0);
 }
 
+static void expand_4i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                          uint32_t cofs, uint32_t oprsz, int64_t c,
+                          void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64,
+                                      int64_t))
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    TCGv_i64 t3 = tcg_temp_new_i64();
+    uint32_t i;
+
+    for (i = 0; i < oprsz; i += 8) {
+        tcg_gen_ld_i64(t1, cpu_env, aofs + i);
+        tcg_gen_ld_i64(t2, cpu_env, bofs + i);
+        tcg_gen_ld_i64(t3, cpu_env, cofs + i);
+        fni(t0, t1, t2, t3, c);
+        tcg_gen_st_i64(t0, cpu_env, dofs + i);
+    }
+    tcg_temp_free_i64(t3);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t0);
+}
+
 /* Expand OPSZ bytes worth of two-operand operations using host vectors.  */
 static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
                          uint32_t oprsz, uint32_t tysz, TCGType type,
@@ -1121,6 +1169,35 @@ static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
     tcg_temp_free_vec(t0);
 }
 
+/*
+ * Expand OPSZ bytes worth of four-vector operands and an immediate operand
+ * using host vectors.
+ */
+static void expand_4i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+                          uint32_t bofs, uint32_t cofs, uint32_t oprsz,
+                          uint32_t tysz, TCGType type, int64_t c,
+                          void (*fni)(unsigned, TCGv_vec, TCGv_vec,
+                                     TCGv_vec, TCGv_vec, int64_t))
+{
+    TCGv_vec t0 = tcg_temp_new_vec(type);
+    TCGv_vec t1 = tcg_temp_new_vec(type);
+    TCGv_vec t2 = tcg_temp_new_vec(type);
+    TCGv_vec t3 = tcg_temp_new_vec(type);
+    uint32_t i;
+
+    for (i = 0; i < oprsz; i += tysz) {
+        tcg_gen_ld_vec(t1, cpu_env, aofs + i);
+        tcg_gen_ld_vec(t2, cpu_env, bofs + i);
+        tcg_gen_ld_vec(t3, cpu_env, cofs + i);
+        fni(vece, t0, t1, t2, t3, c);
+        tcg_gen_st_vec(t0, cpu_env, dofs + i);
+    }
+    tcg_temp_free_vec(t3);
+    tcg_temp_free_vec(t2);
+    tcg_temp_free_vec(t1);
+    tcg_temp_free_vec(t0);
+}
+
 /* Expand a vector two-operand operation.  */
 void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
                     uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g)
@@ -1533,6 +1610,75 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
     }
 }
 
+/* Expand a vector four-operand operation.  */
+void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
+                     uint32_t oprsz, uint32_t maxsz, int64_t c,
+                     const GVecGen4i *g)
+{
+    const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty;
+    const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list);
+    TCGType type;
+    uint32_t some;
+
+    check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs);
+    check_overlap_4(dofs, aofs, bofs, cofs, maxsz);
+
+    type = 0;
+    if (g->fniv) {
+        type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64);
+    }
+    switch (type) {
+    case TCG_TYPE_V256:
+        /*
+         * Recall that ARM SVE allows vector sizes that are not a
+         * power of 2, but always a multiple of 16.  The intent is
+         * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+         */
+        some = QEMU_ALIGN_DOWN(oprsz, 32);
+        expand_4i_vec(g->vece, dofs, aofs, bofs, cofs, some,
+                      32, TCG_TYPE_V256, c, g->fniv);
+        if (some == oprsz) {
+            break;
+        }
+        dofs += some;
+        aofs += some;
+        bofs += some;
+        cofs += some;
+        oprsz -= some;
+        maxsz -= some;
+        /* fallthru */
+    case TCG_TYPE_V128:
+        expand_4i_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
+                       16, TCG_TYPE_V128, c, g->fniv);
+        break;
+    case TCG_TYPE_V64:
+        expand_4i_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
+                      8, TCG_TYPE_V64, c, g->fniv);
+        break;
+
+    case 0:
+        if (g->fni8 && check_size_impl(oprsz, 8)) {
+            expand_4i_i64(dofs, aofs, bofs, cofs, oprsz, c, g->fni8);
+        } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+            expand_4i_i32(dofs, aofs, bofs, cofs, oprsz, c, g->fni4);
+        } else {
+            assert(g->fno != NULL);
+            tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs,
+                               oprsz, maxsz, c, g->fno);
+            oprsz = maxsz;
+        }
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+    tcg_swap_vecop_list(hold_list);
+
+    if (oprsz < maxsz) {
+        expand_clr(dofs + oprsz, maxsz - oprsz);
+    }
+}
+
 /*
  * Expand specific vector operations.
  */
-- 
2.25.1



  parent reply	other threads:[~2022-01-25 14:04 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-25 12:19 [PATCH v2 00/38] target/ppc: PowerISA Vector/VSX instruction batch matheus.ferst
2022-01-25 12:19 ` [PATCH v2 01/38] target/ppc: Introduce TRANS*FLAGS macros matheus.ferst
2022-01-25 12:19 ` [PATCH v2 02/38] target/ppc: moved vector even and odd multiplication to decodetree matheus.ferst
2022-01-25 12:19 ` [PATCH v2 03/38] target/ppc: Moved vector multiply high and low " matheus.ferst
2022-01-25 12:19 ` [PATCH v2 04/38] target/ppc: vmulh* instructions use gvec matheus.ferst
2022-01-25 12:19 ` [PATCH v2 05/38] target/ppc: Implement vmsumcud instruction matheus.ferst
2022-01-25 12:19 ` [PATCH v2 06/38] target/ppc: Implement vmsumudm instruction matheus.ferst
2022-01-25 12:19 ` [PATCH v2 07/38] target/ppc: Move vexts[bhw]2[wd] to decodetree matheus.ferst
2022-01-25 12:19 ` [PATCH v2 08/38] target/ppc: Implement vextsd2q matheus.ferst
2022-01-25 12:19 ` [PATCH v2 09/38] target/ppc: Move Vector Compare Equal/Not Equal/Greater Than to decodetree matheus.ferst
2022-01-25 12:19 ` [PATCH v2 10/38] target/ppc: Move Vector Compare Not Equal or Zero " matheus.ferst
2022-01-25 12:19 ` [PATCH v2 11/38] target/ppc: Implement Vector Compare Equal Quadword matheus.ferst
2022-01-25 12:19 ` [PATCH v2 12/38] target/ppc: Implement Vector Compare Greater Than Quadword matheus.ferst
2022-01-25 12:19 ` [PATCH v2 13/38] target/ppc: Implement Vector Compare Quadword matheus.ferst
2022-01-25 12:19 ` [PATCH v2 14/38] target/ppc: implement vstri[bh][lr] matheus.ferst
2022-01-25 12:19 ` [PATCH v2 15/38] target/ppc: implement vclrlb matheus.ferst
2022-01-25 12:19 ` [PATCH v2 16/38] target/ppc: implement vclrrb matheus.ferst
2022-01-25 12:19 ` [PATCH v2 17/38] target/ppc: implement vcntmb[bhwd] matheus.ferst
2022-01-25 12:19 ` [PATCH v2 18/38] target/ppc: implement vgnb matheus.ferst
2022-01-25 12:19 ` [PATCH v2 19/38] target/ppc: Move vsel and vperm/vpermr to decodetree matheus.ferst
2022-01-25 12:19 ` [PATCH v2 20/38] target/ppc: Move xxsel " matheus.ferst
2022-01-25 12:19 ` [PATCH v2 21/38] target/ppc: move xxperm/xxpermr " matheus.ferst
2022-01-25 12:19 ` [PATCH v2 22/38] target/ppc: Move xxpermdi " matheus.ferst
2022-01-25 12:19 ` [PATCH v2 23/38] target/ppc: Implement xxpermx instruction matheus.ferst
2022-01-25 12:19 ` matheus.ferst [this message]
2022-01-25 12:19 ` [PATCH v2 25/38] target/ppc: Implement xxeval matheus.ferst
2022-01-25 12:19 ` [PATCH v2 26/38] target/ppc: Implement xxgenpcv[bhwd]m instruction matheus.ferst
2022-01-25 12:19 ` [PATCH v2 27/38] target/ppc: move xs[n]madd[am][ds]p/xs[n]msub[am][ds]p to decodetree matheus.ferst
2022-01-25 12:19 ` [PATCH v2 28/38] target/ppc: implement xs[n]maddqp[o]/xs[n]msubqp[o] matheus.ferst
2022-01-25 12:19 ` [PATCH v2 29/38] target/ppc: Implement xvtlsbb instruction matheus.ferst
2022-01-25 12:19 ` [PATCH v2 30/38] target/ppc: Remove xscmpnedp instruction matheus.ferst
2022-01-25 12:19 ` [PATCH v2 31/38] target/ppc: Refactor VSX_SCALAR_CMP_DP matheus.ferst
2022-01-25 12:19 ` [PATCH v2 32/38] target/ppc: Implement xscmp{eq,ge,gt}qp matheus.ferst
2022-01-25 12:19 ` [PATCH v2 33/38] target/ppc: Implement do_helper_XX3 and move xxperm* to use it matheus.ferst
2022-01-25 12:19 ` [PATCH v2 34/38] target/ppc: Move xscmp{eq,ge,gt}dp to decodetree matheus.ferst
2022-01-25 12:19 ` [PATCH v2 35/38] target/ppc: Move xs{max, min}[cj]dp to use do_helper_XX3 matheus.ferst
2022-01-25 12:19 ` [PATCH v2 36/38] target/ppc: Refactor VSX_MAX_MINC helper matheus.ferst
2022-01-25 12:19 ` [PATCH v2 37/38] target/ppc: Implement xs{max,min}cqp matheus.ferst
2022-01-25 12:19 ` [PATCH v2 38/38] target/ppc: Implement xvcvbf16spn and xvcvspbf16 instructions matheus.ferst

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220125121943.3269077-25-matheus.ferst@eldorado.org.br \
    --to=matheus.ferst@eldorado.org.br \
    --cc=clg@kaod.org \
    --cc=danielhb413@gmail.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=groug@kaod.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.