All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Lucas Mateus Castro(alqotel)" <lucas.araujo@eldorado.org.br>
To: qemu-ppc@nongnu.org
Cc: "Daniel Henrique Barboza" <danielhb413@gmail.com>,
	richard.henderson@linaro.org, "Greg Kurz" <groug@kaod.org>,
	"open list:All patches CC here" <qemu-devel@nongnu.org>,
	"Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>,
	"Cédric Le Goater" <clg@kaod.org>,
	"David Gibson" <david@gibson.dropbear.id.au>
Subject: [RFC PATCH 2/7] target/ppc: Implemented xvi*ger* instructions
Date: Tue, 26 Apr 2022 09:50:23 -0300	[thread overview]
Message-ID: <20220426125028.18844-3-lucas.araujo@eldorado.org.br> (raw)
In-Reply-To: <20220426125028.18844-1-lucas.araujo@eldorado.org.br>

From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>

Implement the following PowerISA v3.1 instructions:
xvi4ger8:     VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update)
xvi4ger8pp:   VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update)
Positive multiply, Positive accumulate
xvi8ger4:     VSX Vector 4-bit Signed Integer GER (rank-8 update)
xvi8ger4pp:   VSX Vector 4-bit Signed Integer GER (rank-8 update)
Positive multiply, Positive accumulate
xvi8ger4spp:  VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update)
with Saturate Positive multiply, Positive accumulate
xvi16ger2:    VSX Vector 16-bit Signed Integer GER (rank-2 update)
xvi16ger2pp:  VSX Vector 16-bit Signed Integer GER (rank-2 update)
Positive multiply, Positive accumulate
xvi16ger2s:   VSX Vector 16-bit Signed Integer GER (rank-2 update)
with Saturation
xvi16ger2spp: VSX Vector 16-bit Signed Integer GER (rank-2 update)
with Saturation Positive multiply, Positive accumulate

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
 target/ppc/cpu.h                    |  5 ++
 target/ppc/helper.h                 |  3 +
 target/ppc/insn32.decode            | 15 +++++
 target/ppc/int_helper.c             | 85 +++++++++++++++++++++++++++++
 target/ppc/internal.h               | 28 ++++++++++
 target/ppc/translate/vsx-impl.c.inc | 50 +++++++++++++++++
 6 files changed, 186 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index c2b6c987c0..ee55c6cfa2 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -2688,6 +2688,11 @@ static inline uint64_t *cpu_vsrl_ptr(CPUPPCState *env, int i)
     return (uint64_t *)((uintptr_t)env + vsr64_offset(i, false));
 }
 
+static inline ppc_vsr_t *cpu_vsr_ptr(CPUPPCState *env, int i)
+{
+    return (ppc_vsr_t *)((uintptr_t)env + vsr_full_offset(i));
+}
+
 static inline long avr64_offset(int i, bool high)
 {
     return vsr64_offset(i + 32, high);
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index aa6773c4a5..06553517de 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -537,6 +537,9 @@ DEF_HELPER_5(XXBLENDVB, void, vsr, vsr, vsr, vsr, i32)
 DEF_HELPER_5(XXBLENDVH, void, vsr, vsr, vsr, vsr, i32)
 DEF_HELPER_5(XXBLENDVW, void, vsr, vsr, vsr, vsr, i32)
 DEF_HELPER_5(XXBLENDVD, void, vsr, vsr, vsr, vsr, i32)
+DEF_HELPER_6(XVI4GER8, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(XVI8GER4, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(XVI16GER2, void, env, i32, i32, i32, i32, i32)
 
 DEF_HELPER_2(efscfsi, i32, env, i32)
 DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 7a76bedfa6..653f50db93 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -170,6 +170,9 @@
 &XX3            xt xa xb
 @XX3            ...... ..... ..... ..... ........ ...           &XX3 xt=%xx_xt xa=%xx_xa xb=%xx_xb
 
+%xx_at          23:3 !function=times_4
+@XX3_at         ...... ... .. ..... ..... ........ ...          &XX3 xt=%xx_at xb=%xx_xb
+
 &XX3_dm         xt xa xb dm
 @XX3_dm         ...... ..... ..... ..... . dm:2 ..... ...       &XX3_dm xt=%xx_xt xa=%xx_xa xb=%xx_xb
 
@@ -719,3 +722,15 @@ RFEBB           010011-------------- .   0010010010 -   @XL_s
 XXMFACC         011111 ... -- 00000 ----- 0010110001 -   @X_a
 XXMTACC         011111 ... -- 00001 ----- 0010110001 -   @X_a
 XXSETACCZ       011111 ... -- 00011 ----- 0010110001 -   @X_a
+
+## Vector GER instruction
+
+XVI4GER8        111011 ... -- ..... ..... 00100011 ..-  @XX3_at xa=%xx_xa
+XVI4GER8PP      111011 ... -- ..... ..... 00100010 ..-  @XX3_at xa=%xx_xa
+XVI8GER4        111011 ... -- ..... ..... 00000011 ..-  @XX3_at xa=%xx_xa
+XVI8GER4PP      111011 ... -- ..... ..... 00000010 ..-  @XX3_at xa=%xx_xa
+XVI16GER2       111011 ... -- ..... ..... 01001011 ..-  @XX3_at xa=%xx_xa
+XVI16GER2PP     111011 ... -- ..... ..... 01101011 ..-  @XX3_at xa=%xx_xa
+XVI8GER4SPP     111011 ... -- ..... ..... 01100011 ..-  @XX3_at xa=%xx_xa
+XVI16GER2S      111011 ... -- ..... ..... 00101011 ..-  @XX3_at xa=%xx_xa
+XVI16GER2SPP    111011 ... -- ..... ..... 00101010 ..-  @XX3_at xa=%xx_xa
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 8c1674510b..bd2f1a7c2a 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -782,6 +782,91 @@ VCT(uxs, cvtsduw, u32)
 VCT(sxs, cvtsdsw, s32)
 #undef VCT
 
+/*
+ * Packed VSX Integer GER Flags
+ * 00 - no accumulation no saturation
+ * 01 - accumulate but no saturation
+ * 10 - no accumulation but with saturation
+ * 11 - accumulate with saturation
+ */
+static inline bool get_sat(uint32_t flags)
+{
+    return flags & 0x2;
+}
+
+static inline bool get_acc(uint32_t flags)
+{
+    return flags & 0x1;
+}
+
+#define GET_VsrN(a, i) (extract32(a->VsrB((i) / 2), (i) % 2 ? 4 : 0, 4))
+#define GET_VsrB(a, i) a->VsrB(i)
+#define GET_VsrH(a, i) a->VsrH(i)
+
+#define GET_VsrSN(a, i) (sextract32(a->VsrSB((i) / 2), (i) % 2 ? 4 : 0, 4))
+#define GET_VsrSB(a, i) a->VsrSB(i)
+#define GET_VsrSH(a, i) a->VsrSH(i)
+
+#define XVIGER(NAME, RANK, EL)                                                 \
+    void NAME(CPUPPCState *env, uint32_t a_r, uint32_t b_r,                    \
+              uint32_t  at_r, uint32_t mask, uint32_t packed_flags)            \
+    {                                                                          \
+        ppc_vsr_t *a = cpu_vsr_ptr(env, a_r), *b = cpu_vsr_ptr(env, b_r), *at; \
+        bool sat = get_sat(packed_flags), acc = get_acc(packed_flags);         \
+        uint8_t pmsk = ger_get_pmsk(mask), xmsk = ger_get_xmsk(mask),          \
+                ymsk = ger_get_ymsk(mask);                                     \
+        uint8_t pmsk_bit, xmsk_bit, ymsk_bit;                                  \
+        int64_t psum;                                                          \
+        int32_t va, vb;                                                        \
+        int i, j, k;                                                           \
+        for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {           \
+            at = cpu_vsr_ptr(env, at_r + i);                                   \
+            for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {       \
+                if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {                  \
+                    psum = 0;                                                  \
+                    for (k = 0, pmsk_bit = 1 << (RANK - 1); k < RANK;          \
+                         k++, pmsk_bit >>= 1) {                                \
+                        if (pmsk_bit & pmsk) {                                 \
+                            va = (int32_t)GET_VsrS##EL(a, RANK * i + k);       \
+                            vb = (int32_t) ((RANK == 4) ?                      \
+                                                GET_Vsr##EL(b, RANK * j + k) : \
+                                                GET_VsrS##EL(b, RANK * j + k));\
+                            psum += va * vb;                                   \
+                        }                                                      \
+                    }                                                          \
+                    if (acc) {                                                 \
+                        psum += at->VsrSW(j);                                  \
+                    }                                                          \
+                    if (sat && psum > INT32_MAX) {                             \
+                        set_vscr_sat(env);                                     \
+                        at->VsrSW(j) = INT32_MAX;                              \
+                    } else if (sat && psum < INT32_MIN) {                      \
+                        set_vscr_sat(env);                                     \
+                        at->VsrSW(j) = INT32_MIN;                              \
+                    } else {                                                   \
+                        at->VsrSW(j) = (int32_t) psum;                         \
+                    }                                                          \
+                } else {                                                       \
+                    at->VsrSW(j) = 0;                                          \
+                }                                                              \
+            }                                                                  \
+        }                                                                      \
+    }
+
+XVIGER(helper_XVI4GER8, 8, N)
+XVIGER(helper_XVI8GER4, 4, B)
+XVIGER(helper_XVI16GER2, 2, H)
+
+#undef GER_MULT
+#undef XVIGER_NAME
+#undef XVIGER
+#undef GET_VsrN
+#undef GET_VsrB
+#undef GET_VsrH
+#undef GET_VsrSN
+#undef GET_VsrSB
+#undef GET_VsrSH
+
 target_ulong helper_vclzlsbb(ppc_avr_t *r)
 {
     target_ulong count = 0;
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 8094e0b033..a994d98238 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -291,4 +291,32 @@ G_NORETURN void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
                                             uintptr_t retaddr);
 #endif
 
+/*
+ * Auxiliary functions to pack/unpack masks for GER instructions.
+ *
+ * Packed format:
+ *  Bits 0-3: xmsk
+ *  Bits 4-7: ymsk
+ *  Bits 8-15: pmsk
+ */
+static inline uint8_t ger_get_xmsk(uint32_t packed_masks)
+{
+    return packed_masks & 0xF;
+}
+
+static inline uint8_t ger_get_ymsk(uint32_t packed_masks)
+{
+    return (packed_masks >> 4) & 0xF;
+}
+
+static inline uint8_t ger_get_pmsk(uint32_t packed_masks)
+{
+    return (packed_masks >> 8) & 0xFF;
+}
+
+static inline int ger_pack_masks(int pmsk, int ymsk, int xmsk)
+{
+    return (pmsk & 0xFF) << 8 | (ymsk & 0xF) << 4 | (xmsk & 0xF);
+}
+
 #endif /* PPC_INTERNAL_H */
diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc
index 919b889c40..1eb68c7081 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -2823,6 +2823,56 @@ static bool trans_XXSETACCZ(DisasContext *ctx, arg_X_a *a)
     return true;
 }
 
+/*
+ * Packed VSX Integer GER Flags
+ * 00 - no accumulation no saturation
+ * 01 - accumulate but no saturation
+ * 10 - no accumulation but with saturation
+ * 11 - accumulate with saturation
+ */
+static uint32_t pack_flags_xvi(int acc, int sat)
+{
+    return (sat << 1) | acc;
+}
+
+static bool do_ger_XX3(DisasContext *ctx, arg_XX3 *a, uint32_t op,
+                             void (*helper)(TCGv_env, TCGv_i32, TCGv_i32,
+                                            TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    uint32_t mask;
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VSX(ctx);
+    if (unlikely((a->xa / 4 == a->xt / 4) || (a->xb / 4 == a->xt / 4))) {
+        gen_invalid(ctx);
+        return true;
+    }
+
+    mask = 0xFFFFFFFF;
+    helper(cpu_env, tcg_constant_i32(a->xa), tcg_constant_i32(a->xb),
+           tcg_constant_i32(a->xt), tcg_constant_i32(mask),
+           tcg_constant_i32(op));
+    return true;
+}
+
+/* Used to keep line length < 80 */
+#define GER_NOP pack_flags_xvi(0, 0)
+#define GER_PP  pack_flags_xvi(1, 0)
+#define GER_SAT pack_flags_xvi(0, 1)
+#define GER_SPP pack_flags_xvi(1, 1)
+TRANS(XVI4GER8, do_ger_XX3, GER_NOP, gen_helper_XVI4GER8)
+TRANS(XVI4GER8PP, do_ger_XX3, GER_PP, gen_helper_XVI4GER8)
+TRANS(XVI8GER4, do_ger_XX3, GER_NOP, gen_helper_XVI8GER4)
+TRANS(XVI8GER4PP, do_ger_XX3, GER_PP, gen_helper_XVI8GER4)
+TRANS(XVI8GER4SPP, do_ger_XX3, GER_SPP, gen_helper_XVI8GER4)
+TRANS(XVI16GER2, do_ger_XX3, GER_NOP, gen_helper_XVI16GER2)
+TRANS(XVI16GER2PP, do_ger_XX3, GER_PP, gen_helper_XVI16GER2)
+TRANS(XVI16GER2S, do_ger_XX3, GER_SAT, gen_helper_XVI16GER2)
+TRANS(XVI16GER2SPP, do_ger_XX3, GER_SPP, gen_helper_XVI16GER2)
+#undef GER_NOP
+#undef GER_PP
+#undef GER_SAT
+#undef GER_SPP
+
 #undef GEN_XX2FORM
 #undef GEN_XX3FORM
 #undef GEN_XX2IFORM
-- 
2.31.1



  parent reply	other threads:[~2022-04-26 13:06 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-26 12:50 [RFC PATCH 0/7] VSX MMA Implementation Lucas Mateus Castro(alqotel)
2022-04-26 12:50 ` [RFC PATCH 1/7] target/ppc: Implement xxm[tf]acc and xxsetaccz Lucas Mateus Castro(alqotel)
2022-04-26 22:59   ` Richard Henderson
2022-04-26 12:50 ` Lucas Mateus Castro(alqotel) [this message]
2022-04-26 23:40   ` [RFC PATCH 2/7] target/ppc: Implemented xvi*ger* instructions Richard Henderson
2022-04-27 20:24     ` Lucas Mateus Martins Araujo e Castro
2022-04-27 22:28       ` Richard Henderson
2022-04-26 12:50 ` [RFC PATCH 3/7] target/ppc: Implemented pmxvi*ger* instructions Lucas Mateus Castro(alqotel)
2022-04-26 12:50 ` [RFC PATCH 4/7] target/ppc: Implemented xvf*ger* Lucas Mateus Castro(alqotel)
2022-04-27  0:09   ` Richard Henderson
2022-04-26 12:50 ` [RFC PATCH 5/7] target/ppc: Implemented xvf16ger* Lucas Mateus Castro(alqotel)
2022-04-27  0:26   ` Richard Henderson
2022-04-27 21:11     ` Lucas Mateus Martins Araujo e Castro
2022-04-27 22:30       ` Richard Henderson
2022-04-26 12:50 ` [RFC PATCH 6/7] target/ppc: Implemented pmxvf*ger* Lucas Mateus Castro(alqotel)
2022-04-27  0:33   ` Richard Henderson
2022-04-26 12:50 ` [RFC PATCH 7/7] target/ppc: Implemented [pm]xvbf16ger2* Lucas Mateus Castro(alqotel)
2022-04-27  6:21 ` [RFC PATCH 0/7] VSX MMA Implementation Joel Stanley
2022-04-27  7:10   ` Cédric Le Goater
2022-05-05  6:06     ` Joel Stanley
2022-04-28 14:05 ` Lucas Mateus Martins Araujo e Castro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220426125028.18844-3-lucas.araujo@eldorado.org.br \
    --to=lucas.araujo@eldorado.org.br \
    --cc=clg@kaod.org \
    --cc=danielhb413@gmail.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=groug@kaod.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.