All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Lucas Mateus Castro(alqotel)" <lucas.araujo@eldorado.org.br>
To: qemu-ppc@nongnu.org
Cc: richard.henderson@linaro.org,
	"Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>,
	"Cédric Le Goater" <clg@kaod.org>,
	"Daniel Henrique Barboza" <danielhb413@gmail.com>,
	"David Gibson" <david@gibson.dropbear.id.au>,
	"Greg Kurz" <groug@kaod.org>,
	qemu-devel@nongnu.org (open list:All patches CC here)
Subject: [PATCH v4 2/8] target/ppc: Implemented xvi*ger* instructions
Date: Fri, 20 May 2022 10:51:23 -0300	[thread overview]
Message-ID: <20220520135129.63664-3-lucas.araujo@eldorado.org.br> (raw)
In-Reply-To: <20220520135129.63664-1-lucas.araujo@eldorado.org.br>

From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>

Implement the following PowerISA v3.1 instructions:
xvi4ger8:     VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update)
xvi4ger8pp:   VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update)
Positive multiply, Positive accumulate
xvi8ger4:     VSX Vector 4-bit Signed Integer GER (rank-8 update)
xvi8ger4pp:   VSX Vector 4-bit Signed Integer GER (rank-8 update)
Positive multiply, Positive accumulate
xvi8ger4spp:  VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update)
with Saturate Positive multiply, Positive accumulate
xvi16ger2:    VSX Vector 16-bit Signed Integer GER (rank-2 update)
xvi16ger2pp:  VSX Vector 16-bit Signed Integer GER (rank-2 update)
Positive multiply, Positive accumulate
xvi16ger2s:   VSX Vector 16-bit Signed Integer GER (rank-2 update)
with Saturation
xvi16ger2spp: VSX Vector 16-bit Signed Integer GER (rank-2 update)
with Saturation Positive multiply, Positive accumulate

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/ppc/cpu.h                    |   1 +
 target/ppc/helper.h                 |  13 +++
 target/ppc/insn32.decode            |  18 ++++
 target/ppc/int_helper.c             | 130 ++++++++++++++++++++++++++++
 target/ppc/internal.h               |  15 ++++
 target/ppc/translate/vsx-impl.c.inc |  41 +++++++++
 6 files changed, 218 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 2e80d0978f..c8a12a3985 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -238,6 +238,7 @@ typedef union _ppc_vsr_t {
 
 typedef ppc_vsr_t ppc_avr_t;
 typedef ppc_vsr_t ppc_fprp_t;
+typedef ppc_vsr_t ppc_acc_t;
 
 #if !defined(CONFIG_USER_ONLY)
 /* Software TLB cache */
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index aa6773c4a5..29354276f0 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -133,6 +133,10 @@ DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64)
 #define dh_ctype_vsr ppc_vsr_t *
 #define dh_typecode_vsr dh_typecode_ptr
 
+#define dh_alias_acc ptr
+#define dh_ctype_acc ppc_acc_t *
+#define dh_typecode_acc dh_typecode_ptr
+
 DEF_HELPER_3(vavgub, void, avr, avr, avr)
 DEF_HELPER_3(vavguh, void, avr, avr, avr)
 DEF_HELPER_3(vavguw, void, avr, avr, avr)
@@ -537,6 +541,15 @@ DEF_HELPER_5(XXBLENDVB, void, vsr, vsr, vsr, vsr, i32)
 DEF_HELPER_5(XXBLENDVH, void, vsr, vsr, vsr, vsr, i32)
 DEF_HELPER_5(XXBLENDVW, void, vsr, vsr, vsr, vsr, i32)
 DEF_HELPER_5(XXBLENDVD, void, vsr, vsr, vsr, vsr, i32)
+DEF_HELPER_5(XVI4GER8, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVI4GER8PP, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVI8GER4, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVI8GER4PP, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVI8GER4SPP, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVI16GER2, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVI16GER2S, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVI16GER2PP, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVI16GER2SPP, void, env, vsr, vsr, acc, i32)
 
 DEF_HELPER_2(efscfsi, i32, env, i32)
 DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 7a76bedfa6..899a04bf77 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -170,6 +170,12 @@
 &XX3            xt xa xb
 @XX3            ...... ..... ..... ..... ........ ...           &XX3 xt=%xx_xt xa=%xx_xa xb=%xx_xb
 
+# 32 bit GER instructions have all mask bits considered 1
+&MMIRR_XX3      xa xb xt pmsk xmsk ymsk
+%xx_at          23:3
+@XX3_at         ...... ... .. ..... ..... ........ ...          &MMIRR_XX3 xt=%xx_at xb=%xx_xb \
+                                                                pmsk=255 xmsk=15 ymsk=15
+
 &XX3_dm         xt xa xb dm
 @XX3_dm         ...... ..... ..... ..... . dm:2 ..... ...       &XX3_dm xt=%xx_xt xa=%xx_xa xb=%xx_xb
 
@@ -719,3 +725,15 @@ RFEBB           010011-------------- .   0010010010 -   @XL_s
 XXMFACC         011111 ... -- 00000 ----- 0010110001 -   @X_a
 XXMTACC         011111 ... -- 00001 ----- 0010110001 -   @X_a
 XXSETACCZ       011111 ... -- 00011 ----- 0010110001 -   @X_a
+
+## VSX GER instruction
+
+XVI4GER8        111011 ... -- ..... ..... 00100011 ..-  @XX3_at xa=%xx_xa
+XVI4GER8PP      111011 ... -- ..... ..... 00100010 ..-  @XX3_at xa=%xx_xa
+XVI8GER4        111011 ... -- ..... ..... 00000011 ..-  @XX3_at xa=%xx_xa
+XVI8GER4PP      111011 ... -- ..... ..... 00000010 ..-  @XX3_at xa=%xx_xa
+XVI16GER2       111011 ... -- ..... ..... 01001011 ..-  @XX3_at xa=%xx_xa
+XVI16GER2PP     111011 ... -- ..... ..... 01101011 ..-  @XX3_at xa=%xx_xa
+XVI8GER4SPP     111011 ... -- ..... ..... 01100011 ..-  @XX3_at xa=%xx_xa
+XVI16GER2S      111011 ... -- ..... ..... 00101011 ..-  @XX3_at xa=%xx_xa
+XVI16GER2SPP    111011 ... -- ..... ..... 00101010 ..-  @XX3_at xa=%xx_xa
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 8c1674510b..32a7d99718 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -782,6 +782,136 @@ VCT(uxs, cvtsduw, u32)
 VCT(sxs, cvtsdsw, s32)
 #undef VCT
 
+typedef int64_t do_ger(uint32_t, uint32_t, uint32_t);
+
+static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask)
+{
+    int64_t psum = 0;
+    for (int i = 0; i < 8; i++, mask >>= 1) {
+        if (mask & 1) {
+            psum += sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
+        }
+    }
+    return psum;
+}
+
+static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask)
+{
+    int64_t psum = 0;
+    for (int i = 0; i < 4; i++, mask >>= 1) {
+        if (mask & 1) {
+            psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8);
+        }
+    }
+    return psum;
+}
+
+static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask)
+{
+    int64_t psum = 0;
+    for (int i = 0; i < 2; i++, mask >>= 1) {
+        if (mask & 1) {
+            psum += sextract32(a, 16 * i, 16) * sextract32(b, 16 * i, 16);
+        }
+    }
+    return psum;
+}
+
+static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t  *at,
+                   uint32_t mask, bool sat, bool acc, do_ger ger)
+{
+    uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
+            xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
+            ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
+    uint8_t xmsk_bit, ymsk_bit;
+    int64_t psum;
+    int i, j;
+    for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
+        for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
+            if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
+                psum = ger(a->VsrW(i), b->VsrW(j), pmsk);
+                if (acc) {
+                    psum += at[i].VsrSW(j);
+                }
+                if (sat && psum > INT32_MAX) {
+                    set_vscr_sat(env);
+                    at[i].VsrSW(j) = INT32_MAX;
+                } else if (sat && psum < INT32_MIN) {
+                    set_vscr_sat(env);
+                    at[i].VsrSW(j) = INT32_MIN;
+                } else {
+                    at[i].VsrSW(j) = (int32_t) psum;
+                }
+            } else {
+                at[i].VsrSW(j) = 0;
+            }
+        }
+    }
+}
+
+QEMU_FLATTEN
+void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                     ppc_acc_t *at, uint32_t mask)
+{
+    xviger(env, a, b, at, mask, false, false, ger_rank8);
+}
+
+QEMU_FLATTEN
+void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                       ppc_acc_t *at, uint32_t mask)
+{
+    xviger(env, a, b, at, mask, false, true, ger_rank8);
+}
+
+QEMU_FLATTEN
+void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                     ppc_acc_t *at, uint32_t mask)
+{
+    xviger(env, a, b, at, mask, false, false, ger_rank4);
+}
+
+QEMU_FLATTEN
+void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                       ppc_acc_t *at, uint32_t mask)
+{
+    xviger(env, a, b, at, mask, false, true, ger_rank4);
+}
+
+QEMU_FLATTEN
+void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                        ppc_acc_t *at, uint32_t mask)
+{
+    xviger(env, a, b, at, mask, true, true, ger_rank4);
+}
+
+QEMU_FLATTEN
+void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                      ppc_acc_t *at, uint32_t mask)
+{
+    xviger(env, a, b, at, mask, false, false, ger_rank2);
+}
+
+QEMU_FLATTEN
+void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                       ppc_acc_t *at, uint32_t mask)
+{
+    xviger(env, a, b, at, mask, true, false, ger_rank2);
+}
+
+QEMU_FLATTEN
+void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                        ppc_acc_t *at, uint32_t mask)
+{
+    xviger(env, a, b, at, mask, false, true, ger_rank2);
+}
+
+QEMU_FLATTEN
+void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                         ppc_acc_t *at, uint32_t mask)
+{
+    xviger(env, a, b, at, mask, true, true, ger_rank2);
+}
+
 target_ulong helper_vclzlsbb(ppc_avr_t *r)
 {
     target_ulong count = 0;
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 8094e0b033..2add128cd1 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -18,6 +18,8 @@
 #ifndef PPC_INTERNAL_H
 #define PPC_INTERNAL_H
 
+#include "hw/registerfields.h"
+
 #define FUNC_MASK(name, ret_type, size, max_val)                  \
 static inline ret_type name(uint##size##_t start,                 \
                               uint##size##_t end)                 \
@@ -291,4 +293,17 @@ G_NORETURN void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
                                             uintptr_t retaddr);
 #endif
 
+FIELD(GER_MSK, XMSK, 0, 4)
+FIELD(GER_MSK, YMSK, 4, 4)
+FIELD(GER_MSK, PMSK, 8, 8)
+
+static inline int ger_pack_masks(int pmsk, int ymsk, int xmsk)
+{
+    int msk = 0;
+    msk = FIELD_DP32(msk, GER_MSK, XMSK, xmsk);
+    msk = FIELD_DP32(msk, GER_MSK, YMSK, ymsk);
+    msk = FIELD_DP32(msk, GER_MSK, PMSK, pmsk);
+    return msk;
+}
+
 #endif /* PPC_INTERNAL_H */
diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc
index dc8875d5d3..9d4309e841 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -17,6 +17,13 @@ static inline TCGv_ptr gen_vsr_ptr(int reg)
     return r;
 }
 
+static inline TCGv_ptr gen_acc_ptr(int reg)
+{
+    TCGv_ptr r = tcg_temp_new_ptr();
+    tcg_gen_addi_ptr(r, cpu_env, acc_full_offset(reg));
+    return r;
+}
+
 #define VSX_LOAD_SCALAR(name, operation)                      \
 static void gen_##name(DisasContext *ctx)                     \
 {                                                             \
@@ -2818,6 +2825,40 @@ static bool trans_XXSETACCZ(DisasContext *ctx, arg_X_a *a)
     return true;
 }
 
+static bool do_ger(DisasContext *ctx, arg_MMIRR_XX3 *a,
+                   void (*helper)(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32))
+{
+    uint32_t mask;
+    TCGv_ptr xt, xa, xb;
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VSX(ctx);
+    if (unlikely((a->xa / 4 == a->xt) || (a->xb / 4 == a->xt))) {
+        gen_invalid(ctx);
+        return true;
+    }
+
+    xt = gen_acc_ptr(a->xt);
+    xa = gen_vsr_ptr(a->xa);
+    xb = gen_vsr_ptr(a->xb);
+
+    mask = ger_pack_masks(a->pmsk, a->ymsk, a->xmsk);
+    helper(cpu_env, xa, xb, xt, tcg_constant_i32(mask));
+    tcg_temp_free_ptr(xt);
+    tcg_temp_free_ptr(xa);
+    tcg_temp_free_ptr(xb);
+    return true;
+}
+
+TRANS(XVI4GER8, do_ger, gen_helper_XVI4GER8)
+TRANS(XVI4GER8PP, do_ger,  gen_helper_XVI4GER8PP)
+TRANS(XVI8GER4, do_ger, gen_helper_XVI8GER4)
+TRANS(XVI8GER4PP, do_ger,  gen_helper_XVI8GER4PP)
+TRANS(XVI8GER4SPP, do_ger, gen_helper_XVI8GER4SPP)
+TRANS(XVI16GER2, do_ger, gen_helper_XVI16GER2)
+TRANS(XVI16GER2PP, do_ger, gen_helper_XVI16GER2PP)
+TRANS(XVI16GER2S, do_ger, gen_helper_XVI16GER2S)
+TRANS(XVI16GER2SPP, do_ger, gen_helper_XVI16GER2SPP)
+
 #undef GEN_XX2FORM
 #undef GEN_XX3FORM
 #undef GEN_XX2IFORM
-- 
2.31.1



  parent reply	other threads:[~2022-05-20 13:54 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-20 13:51 [PATCH v4 0/8] VSX MMA Implementation Lucas Mateus Castro(alqotel)
2022-05-20 13:51 ` [PATCH v4 1/8] target/ppc: Implement xxm[tf]acc and xxsetaccz Lucas Mateus Castro(alqotel)
2022-05-20 13:51 ` Lucas Mateus Castro(alqotel) [this message]
2022-05-20 13:51 ` [PATCH v4 3/8] target/ppc: Implemented pmxvi*ger* instructions Lucas Mateus Castro(alqotel)
2022-05-20 13:51 ` [PATCH v4 4/8] target/ppc: Implemented xvf*ger* Lucas Mateus Castro(alqotel)
2022-05-20 13:51 ` [PATCH v4 5/8] target/ppc: Implemented xvf16ger* Lucas Mateus Castro(alqotel)
2022-05-20 15:47   ` Richard Henderson
2022-05-20 16:42     ` Lucas Mateus Martins Araujo e Castro
2022-05-20 19:03       ` Richard Henderson
2022-05-20 13:51 ` [PATCH v4 6/8] target/ppc: Implemented pmxvf*ger* Lucas Mateus Castro(alqotel)
2022-05-20 13:51 ` [PATCH v4 7/8] target/ppc: Implemented [pm]xvbf16ger2* Lucas Mateus Castro(alqotel)
2022-05-20 13:51 ` [PATCH v4 8/8] linux-user: Add PowerPC ISA 3.1 and MMA to hwcap Lucas Mateus Castro(alqotel)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220520135129.63664-3-lucas.araujo@eldorado.org.br \
    --to=lucas.araujo@eldorado.org.br \
    --cc=clg@kaod.org \
    --cc=danielhb413@gmail.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=groug@kaod.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.