From: "Lucas Mateus Castro(alqotel)" <lucas.araujo@eldorado.org.br>
To: qemu-ppc@nongnu.org
Cc: "Daniel Henrique Barboza" <danielhb413@gmail.com>,
richard.henderson@linaro.org, "Greg Kurz" <groug@kaod.org>,
"open list:All patches CC here" <qemu-devel@nongnu.org>,
"Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>,
"Cédric Le Goater" <clg@kaod.org>,
"David Gibson" <david@gibson.dropbear.id.au>
Subject: [RFC PATCH 2/7] target/ppc: Implemented xvi*ger* instructions
Date: Tue, 26 Apr 2022 09:50:23 -0300 [thread overview]
Message-ID: <20220426125028.18844-3-lucas.araujo@eldorado.org.br> (raw)
In-Reply-To: <20220426125028.18844-1-lucas.araujo@eldorado.org.br>
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Implement the following PowerISA v3.1 instructions:
xvi4ger8: VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update)
xvi4ger8pp: VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update)
Positive multiply, Positive accumulate
xvi8ger4: VSX Vector 4-bit Signed Integer GER (rank-8 update)
xvi8ger4pp: VSX Vector 4-bit Signed Integer GER (rank-8 update)
Positive multiply, Positive accumulate
xvi8ger4spp: VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update)
with Saturate Positive multiply, Positive accumulate
xvi16ger2: VSX Vector 16-bit Signed Integer GER (rank-2 update)
xvi16ger2pp: VSX Vector 16-bit Signed Integer GER (rank-2 update)
Positive multiply, Positive accumulate
xvi16ger2s: VSX Vector 16-bit Signed Integer GER (rank-2 update)
with Saturation
xvi16ger2spp: VSX Vector 16-bit Signed Integer GER (rank-2 update)
with Saturation Positive multiply, Positive accumulate
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
target/ppc/cpu.h | 5 ++
target/ppc/helper.h | 3 +
target/ppc/insn32.decode | 15 +++++
target/ppc/int_helper.c | 85 +++++++++++++++++++++++++++++
target/ppc/internal.h | 28 ++++++++++
target/ppc/translate/vsx-impl.c.inc | 50 +++++++++++++++++
6 files changed, 186 insertions(+)
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index c2b6c987c0..ee55c6cfa2 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -2688,6 +2688,11 @@ static inline uint64_t *cpu_vsrl_ptr(CPUPPCState *env, int i)
return (uint64_t *)((uintptr_t)env + vsr64_offset(i, false));
}
+static inline ppc_vsr_t *cpu_vsr_ptr(CPUPPCState *env, int i)
+{
+ return (ppc_vsr_t *)((uintptr_t)env + vsr_full_offset(i));
+}
+
static inline long avr64_offset(int i, bool high)
{
return vsr64_offset(i + 32, high);
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index aa6773c4a5..06553517de 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -537,6 +537,9 @@ DEF_HELPER_5(XXBLENDVB, void, vsr, vsr, vsr, vsr, i32)
DEF_HELPER_5(XXBLENDVH, void, vsr, vsr, vsr, vsr, i32)
DEF_HELPER_5(XXBLENDVW, void, vsr, vsr, vsr, vsr, i32)
DEF_HELPER_5(XXBLENDVD, void, vsr, vsr, vsr, vsr, i32)
+DEF_HELPER_6(XVI4GER8, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(XVI8GER4, void, env, i32, i32, i32, i32, i32)
+DEF_HELPER_6(XVI16GER2, void, env, i32, i32, i32, i32, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 7a76bedfa6..653f50db93 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -170,6 +170,9 @@
&XX3 xt xa xb
@XX3 ...... ..... ..... ..... ........ ... &XX3 xt=%xx_xt xa=%xx_xa xb=%xx_xb
+%xx_at 23:3 !function=times_4
+@XX3_at ...... ... .. ..... ..... ........ ... &XX3 xt=%xx_at xb=%xx_xb
+
&XX3_dm xt xa xb dm
@XX3_dm ...... ..... ..... ..... . dm:2 ..... ... &XX3_dm xt=%xx_xt xa=%xx_xa xb=%xx_xb
@@ -719,3 +722,15 @@ RFEBB 010011-------------- . 0010010010 - @XL_s
XXMFACC 011111 ... -- 00000 ----- 0010110001 - @X_a
XXMTACC 011111 ... -- 00001 ----- 0010110001 - @X_a
XXSETACCZ 011111 ... -- 00011 ----- 0010110001 - @X_a
+
+## Vector GER instruction
+
+XVI4GER8 111011 ... -- ..... ..... 00100011 ..- @XX3_at xa=%xx_xa
+XVI4GER8PP 111011 ... -- ..... ..... 00100010 ..- @XX3_at xa=%xx_xa
+XVI8GER4 111011 ... -- ..... ..... 00000011 ..- @XX3_at xa=%xx_xa
+XVI8GER4PP 111011 ... -- ..... ..... 00000010 ..- @XX3_at xa=%xx_xa
+XVI16GER2 111011 ... -- ..... ..... 01001011 ..- @XX3_at xa=%xx_xa
+XVI16GER2PP 111011 ... -- ..... ..... 01101011 ..- @XX3_at xa=%xx_xa
+XVI8GER4SPP 111011 ... -- ..... ..... 01100011 ..- @XX3_at xa=%xx_xa
+XVI16GER2S 111011 ... -- ..... ..... 00101011 ..- @XX3_at xa=%xx_xa
+XVI16GER2SPP 111011 ... -- ..... ..... 00101010 ..- @XX3_at xa=%xx_xa
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 8c1674510b..bd2f1a7c2a 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -782,6 +782,91 @@ VCT(uxs, cvtsduw, u32)
VCT(sxs, cvtsdsw, s32)
#undef VCT
+/*
+ * Packed VSX Integer GER Flags
+ * 00 - no accumulation no saturation
+ * 01 - accumulate but no saturation
+ * 10 - no accumulation but with saturation
+ * 11 - accumulate with saturation
+ */
+static inline bool get_sat(uint32_t flags)
+{
+ return flags & 0x2;
+}
+
+static inline bool get_acc(uint32_t flags)
+{
+ return flags & 0x1;
+}
+
+#define GET_VsrN(a, i) (extract32(a->VsrB((i) / 2), (i) % 2 ? 4 : 0, 4))
+#define GET_VsrB(a, i) a->VsrB(i)
+#define GET_VsrH(a, i) a->VsrH(i)
+
+#define GET_VsrSN(a, i) (sextract32(a->VsrSB((i) / 2), (i) % 2 ? 4 : 0, 4))
+#define GET_VsrSB(a, i) a->VsrSB(i)
+#define GET_VsrSH(a, i) a->VsrSH(i)
+
+#define XVIGER(NAME, RANK, EL) \
+ void NAME(CPUPPCState *env, uint32_t a_r, uint32_t b_r, \
+ uint32_t at_r, uint32_t mask, uint32_t packed_flags) \
+ { \
+ ppc_vsr_t *a = cpu_vsr_ptr(env, a_r), *b = cpu_vsr_ptr(env, b_r), *at; \
+ bool sat = get_sat(packed_flags), acc = get_acc(packed_flags); \
+ uint8_t pmsk = ger_get_pmsk(mask), xmsk = ger_get_xmsk(mask), \
+ ymsk = ger_get_ymsk(mask); \
+ uint8_t pmsk_bit, xmsk_bit, ymsk_bit; \
+ int64_t psum; \
+ int32_t va, vb; \
+ int i, j, k; \
+ for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { \
+ at = cpu_vsr_ptr(env, at_r + i); \
+ for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { \
+ if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { \
+ psum = 0; \
+ for (k = 0, pmsk_bit = 1 << (RANK - 1); k < RANK; \
+ k++, pmsk_bit >>= 1) { \
+ if (pmsk_bit & pmsk) { \
+ va = (int32_t)GET_VsrS##EL(a, RANK * i + k); \
+ vb = (int32_t) ((RANK == 4) ? \
+ GET_Vsr##EL(b, RANK * j + k) : \
+ GET_VsrS##EL(b, RANK * j + k));\
+ psum += va * vb; \
+ } \
+ } \
+ if (acc) { \
+ psum += at->VsrSW(j); \
+ } \
+ if (sat && psum > INT32_MAX) { \
+ set_vscr_sat(env); \
+ at->VsrSW(j) = INT32_MAX; \
+ } else if (sat && psum < INT32_MIN) { \
+ set_vscr_sat(env); \
+ at->VsrSW(j) = INT32_MIN; \
+ } else { \
+ at->VsrSW(j) = (int32_t) psum; \
+ } \
+ } else { \
+ at->VsrSW(j) = 0; \
+ } \
+ } \
+ } \
+ }
+
+XVIGER(helper_XVI4GER8, 8, N)
+XVIGER(helper_XVI8GER4, 4, B)
+XVIGER(helper_XVI16GER2, 2, H)
+
+#undef GER_MULT
+#undef XVIGER_NAME
+#undef XVIGER
+#undef GET_VsrN
+#undef GET_VsrB
+#undef GET_VsrH
+#undef GET_VsrSN
+#undef GET_VsrSB
+#undef GET_VsrSH
+
target_ulong helper_vclzlsbb(ppc_avr_t *r)
{
target_ulong count = 0;
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 8094e0b033..a994d98238 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -291,4 +291,32 @@ G_NORETURN void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
uintptr_t retaddr);
#endif
+/*
+ * Auxiliary functions to pack/unpack masks for GER instructions.
+ *
+ * Packed format:
+ * Bits 0-3: xmsk
+ * Bits 4-7: ymsk
+ * Bits 8-15: pmsk
+ */
+static inline uint8_t ger_get_xmsk(uint32_t packed_masks)
+{
+ return packed_masks & 0xF;
+}
+
+static inline uint8_t ger_get_ymsk(uint32_t packed_masks)
+{
+ return (packed_masks >> 4) & 0xF;
+}
+
+static inline uint8_t ger_get_pmsk(uint32_t packed_masks)
+{
+ return (packed_masks >> 8) & 0xFF;
+}
+
+static inline int ger_pack_masks(int pmsk, int ymsk, int xmsk)
+{
+ return (pmsk & 0xFF) << 8 | (ymsk & 0xF) << 4 | (xmsk & 0xF);
+}
+
#endif /* PPC_INTERNAL_H */
diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc
index 919b889c40..1eb68c7081 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -2823,6 +2823,56 @@ static bool trans_XXSETACCZ(DisasContext *ctx, arg_X_a *a)
return true;
}
+/*
+ * Packed VSX Integer GER Flags
+ * 00 - no accumulation no saturation
+ * 01 - accumulate but no saturation
+ * 10 - no accumulation but with saturation
+ * 11 - accumulate with saturation
+ */
+static uint32_t pack_flags_xvi(int acc, int sat)
+{
+ return (sat << 1) | acc;
+}
+
+static bool do_ger_XX3(DisasContext *ctx, arg_XX3 *a, uint32_t op,
+ void (*helper)(TCGv_env, TCGv_i32, TCGv_i32,
+ TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ uint32_t mask;
+ REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+ REQUIRE_VSX(ctx);
+ if (unlikely((a->xa / 4 == a->xt / 4) || (a->xb / 4 == a->xt / 4))) {
+ gen_invalid(ctx);
+ return true;
+ }
+
+ mask = 0xFFFFFFFF;
+ helper(cpu_env, tcg_constant_i32(a->xa), tcg_constant_i32(a->xb),
+ tcg_constant_i32(a->xt), tcg_constant_i32(mask),
+ tcg_constant_i32(op));
+ return true;
+}
+
+/* Used to keep line length < 80 */
+#define GER_NOP pack_flags_xvi(0, 0)
+#define GER_PP pack_flags_xvi(1, 0)
+#define GER_SAT pack_flags_xvi(0, 1)
+#define GER_SPP pack_flags_xvi(1, 1)
+TRANS(XVI4GER8, do_ger_XX3, GER_NOP, gen_helper_XVI4GER8)
+TRANS(XVI4GER8PP, do_ger_XX3, GER_PP, gen_helper_XVI4GER8)
+TRANS(XVI8GER4, do_ger_XX3, GER_NOP, gen_helper_XVI8GER4)
+TRANS(XVI8GER4PP, do_ger_XX3, GER_PP, gen_helper_XVI8GER4)
+TRANS(XVI8GER4SPP, do_ger_XX3, GER_SPP, gen_helper_XVI8GER4)
+TRANS(XVI16GER2, do_ger_XX3, GER_NOP, gen_helper_XVI16GER2)
+TRANS(XVI16GER2PP, do_ger_XX3, GER_PP, gen_helper_XVI16GER2)
+TRANS(XVI16GER2S, do_ger_XX3, GER_SAT, gen_helper_XVI16GER2)
+TRANS(XVI16GER2SPP, do_ger_XX3, GER_SPP, gen_helper_XVI16GER2)
+#undef GER_NOP
+#undef GER_PP
+#undef GER_SAT
+#undef GER_SPP
+
#undef GEN_XX2FORM
#undef GEN_XX3FORM
#undef GEN_XX2IFORM
--
2.31.1
next prev parent reply other threads:[~2022-04-26 13:06 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-26 12:50 [RFC PATCH 0/7] VSX MMA Implementation Lucas Mateus Castro(alqotel)
2022-04-26 12:50 ` [RFC PATCH 1/7] target/ppc: Implement xxm[tf]acc and xxsetaccz Lucas Mateus Castro(alqotel)
2022-04-26 22:59 ` Richard Henderson
2022-04-26 12:50 ` Lucas Mateus Castro(alqotel) [this message]
2022-04-26 23:40 ` [RFC PATCH 2/7] target/ppc: Implemented xvi*ger* instructions Richard Henderson
2022-04-27 20:24 ` Lucas Mateus Martins Araujo e Castro
2022-04-27 22:28 ` Richard Henderson
2022-04-26 12:50 ` [RFC PATCH 3/7] target/ppc: Implemented pmxvi*ger* instructions Lucas Mateus Castro(alqotel)
2022-04-26 12:50 ` [RFC PATCH 4/7] target/ppc: Implemented xvf*ger* Lucas Mateus Castro(alqotel)
2022-04-27 0:09 ` Richard Henderson
2022-04-26 12:50 ` [RFC PATCH 5/7] target/ppc: Implemented xvf16ger* Lucas Mateus Castro(alqotel)
2022-04-27 0:26 ` Richard Henderson
2022-04-27 21:11 ` Lucas Mateus Martins Araujo e Castro
2022-04-27 22:30 ` Richard Henderson
2022-04-26 12:50 ` [RFC PATCH 6/7] target/ppc: Implemented pmxvf*ger* Lucas Mateus Castro(alqotel)
2022-04-27 0:33 ` Richard Henderson
2022-04-26 12:50 ` [RFC PATCH 7/7] target/ppc: Implemented [pm]xvbf16ger2* Lucas Mateus Castro(alqotel)
2022-04-27 6:21 ` [RFC PATCH 0/7] VSX MMA Implementation Joel Stanley
2022-04-27 7:10 ` Cédric Le Goater
2022-05-05 6:06 ` Joel Stanley
2022-04-28 14:05 ` Lucas Mateus Martins Araujo e Castro
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220426125028.18844-3-lucas.araujo@eldorado.org.br \
--to=lucas.araujo@eldorado.org.br \
--cc=clg@kaod.org \
--cc=danielhb413@gmail.com \
--cc=david@gibson.dropbear.id.au \
--cc=groug@kaod.org \
--cc=qemu-devel@nongnu.org \
--cc=qemu-ppc@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).