All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Lucas Mateus Castro(alqotel)" <lucas.araujo@eldorado.org.br>
To: qemu-ppc@nongnu.org
Cc: richard.henderson@linaro.org,
	"Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>,
	"Cédric Le Goater" <clg@kaod.org>,
	"Daniel Henrique Barboza" <danielhb413@gmail.com>,
	"David Gibson" <david@gibson.dropbear.id.au>,
	"Greg Kurz" <groug@kaod.org>,
	qemu-devel@nongnu.org (open list:All patches CC here)
Subject: [PATCH v3 4/8] target/ppc: Implemented xvf*ger*
Date: Tue, 17 May 2022 15:37:31 -0300	[thread overview]
Message-ID: <20220517183735.110342-5-lucas.araujo@eldorado.org.br> (raw)
In-Reply-To: <20220517183735.110342-1-lucas.araujo@eldorado.org.br>

From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>

Implement the following PowerISA v3.1 instructions:
xvf32ger:   VSX Vector 32-bit Floating-Point GER (rank-1 update)
xvf32gernn: VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative
multiply, Negative accumulate
xvf32gernp: VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative
multiply, Positive accumulate
xvf32gerpn: VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive
multiply, Negative accumulate
xvf32gerpp: VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive
multiply, Positive accumulate
xvf64ger:   VSX Vector 64-bit Floating-Point GER (rank-1 update)
xvf64gernn: VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative
multiply, Negative accumulate
xvf64gernp: VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative
multiply, Positive accumulate
xvf64gerpn: VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive
multiply, Negative accumulate
xvf64gerpp: VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive
multiply, Positive accumulate

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
 target/ppc/cpu.h                    |   4 +
 target/ppc/fpu_helper.c             | 193 +++++++++++++++++++++++++++-
 target/ppc/helper.h                 |  10 ++
 target/ppc/insn32.decode            |  13 ++
 target/ppc/translate/vsx-impl.c.inc |  12 ++
 5 files changed, 230 insertions(+), 2 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index c8a12a3985..bdedf4138e 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -2641,6 +2641,8 @@ static inline bool lsw_reg_in_range(int start, int nregs, int rx)
 #define VsrSW(i) s32[i]
 #define VsrD(i) u64[i]
 #define VsrSD(i) s64[i]
+#define VsrSF(i) f32[i]
+#define VsrDF(i) f64[i]
 #else
 #define VsrB(i) u8[15 - (i)]
 #define VsrSB(i) s8[15 - (i)]
@@ -2650,6 +2652,8 @@ static inline bool lsw_reg_in_range(int start, int nregs, int rx)
 #define VsrSW(i) s32[3 - (i)]
 #define VsrD(i) u64[1 - (i)]
 #define VsrSD(i) s64[1 - (i)]
+#define VsrSF(i) f32[3 - (i)]
+#define VsrDF(i) f64[1 - (i)]
 #endif
 
 static inline int vsr64_offset(int i, bool high)
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 8592727792..8451de628b 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -414,7 +414,7 @@ void helper_store_fpscr(CPUPPCState *env, uint64_t val, uint32_t nibbles)
     ppc_store_fpscr(env, val);
 }
 
-void helper_fpscr_check_status(CPUPPCState *env)
+static void do_fpscr_check_status(CPUPPCState *env, uintptr_t raddr)
 {
     CPUState *cs = env_cpu(env);
     target_ulong fpscr = env->fpscr;
@@ -455,13 +455,19 @@ void helper_fpscr_check_status(CPUPPCState *env)
     }
     cs->exception_index = POWERPC_EXCP_PROGRAM;
     env->error_code = error | POWERPC_EXCP_FP;
+    env->fpscr |= error ? FP_FEX : 0;
     /* Deferred floating-point exception after target FPSCR update */
     if (fp_exceptions_enabled(env)) {
         raise_exception_err_ra(env, cs->exception_index,
-                               env->error_code, GETPC());
+                               env->error_code, raddr);
     }
 }
 
+void helper_fpscr_check_status(CPUPPCState *env)
+{
+    do_fpscr_check_status(env, GETPC());
+}
+
 static void do_float_check_status(CPUPPCState *env, bool change_fi,
                                   uintptr_t raddr)
 {
@@ -3469,3 +3475,186 @@ void helper_xssubqp(CPUPPCState *env, uint32_t opcode,
     *xt = t;
     do_float_check_status(env, true, GETPC());
 }
+
+static inline void vsxger_excp(CPUPPCState *env, uintptr_t retaddr)
+{
+    /*
+     * XV*GER instructions execute and set the FPSCR as if exceptions
+     * are disabled and only at the end throw an exception
+     */
+    target_ulong enable;
+    enable = env->fpscr & (FP_ENABLES | FP_FI | FP_FR);
+    env->fpscr &= ~(FP_ENABLES | FP_FI | FP_FR);
+    int status = get_float_exception_flags(&env->fp_status);
+    if (unlikely(status & float_flag_invalid)) {
+        if (status & float_flag_invalid_snan) {
+            float_invalid_op_vxsnan(env, 0);
+        }
+        if (status & float_flag_invalid_imz) {
+            float_invalid_op_vximz(env, false, 0);
+        }
+        if (status & float_flag_invalid_isi) {
+            float_invalid_op_vxisi(env, false, 0);
+        }
+    }
+    do_float_check_status(env, false, retaddr);
+    env->fpscr |= enable;
+    do_fpscr_check_status(env, retaddr);
+}
+
+typedef void vsxger_zero(ppc_vsr_t *at, int, int);
+
+typedef void vsxger_muladd_f(ppc_vsr_t *, ppc_vsr_t *, ppc_vsr_t *, int, int,
+                             int flags, float_status *s);
+
+static void vsxger_muladd32(ppc_vsr_t *at, ppc_vsr_t *a, ppc_vsr_t *b, int i,
+                            int j, int flags, float_status *s)
+{
+    at[i].VsrSF(j) = float32_muladd(a->VsrSF(i), b->VsrSF(j),
+                                    at[i].VsrSF(j), flags, s);
+}
+
+static void vsxger_mul32(ppc_vsr_t *at, ppc_vsr_t *a, ppc_vsr_t *b, int i,
+                         int j, int flags, float_status *s)
+{
+    at[i].VsrSF(j) = float32_mul(a->VsrSF(i), b->VsrSF(j), s);
+}
+
+static void vsxger_zero32(ppc_vsr_t *at, int i, int j)
+{
+    at[i].VsrSF(j) = float32_zero;
+}
+
+static void vsxger_muladd64(ppc_vsr_t *at, ppc_vsr_t *a, ppc_vsr_t *b, int i,
+                            int j, int flags, float_status *s)
+{
+    if (j >= 2) {
+        j -= 2;
+        at[i].VsrDF(j) = float64_muladd(a[i / 2].VsrDF(i % 2), b->VsrDF(j),
+                                        at[i].VsrDF(j), flags, s);
+    }
+}
+
+static void vsxger_mul64(ppc_vsr_t *at, ppc_vsr_t *a, ppc_vsr_t *b, int i,
+                         int j, int flags, float_status *s)
+{
+    if (j >= 2) {
+        j -= 2;
+        at[i].VsrDF(j) = float64_mul(a[i / 2].VsrDF(i % 2), b->VsrDF(j), s);
+    }
+}
+
+static void vsxger_zero64(ppc_vsr_t *at, int i, int j)
+{
+    if (j >= 2) {
+        j -= 2;
+        at[i].VsrDF(j) = float64_zero;
+    }
+}
+
+static void vsxger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t  *at,
+                   uint32_t mask, bool acc, bool neg_mul, bool neg_acc,
+                   vsxger_muladd_f mul, vsxger_muladd_f muladd, vsxger_zero zero)
+{
+    int i, j, xmsk_bit, ymsk_bit, op_flags;
+    uint8_t xmsk = mask & 0x0F;
+    uint8_t ymsk = (mask >> 4) & 0x0F;
+    float_status *excp_ptr = &env->fp_status;
+    op_flags = (neg_acc ^ neg_mul) ? float_muladd_negate_c : 0;
+    op_flags |= (neg_mul) ? float_muladd_negate_result : 0;
+    helper_reset_fpstatus(env);
+    for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
+        for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
+            if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
+                if (acc) {
+                    muladd(at, a, b, i, j, op_flags, excp_ptr);
+                } else {
+                    mul(at, a, b, i, j, op_flags, excp_ptr);
+                }
+            } else {
+                    zero(at, i, j);
+            }
+        }
+    }
+    vsxger_excp(env, GETPC());
+}
+
+QEMU_FLATTEN
+void helper_XVF32GER(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                     ppc_acc_t *at, uint32_t mask)
+{
+    vsxger(env, a, b, at, mask, false, false, false, vsxger_mul32,
+           vsxger_muladd32, vsxger_zero32);
+}
+
+QEMU_FLATTEN
+void helper_XVF32GERPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                       ppc_acc_t *at, uint32_t mask)
+{
+    vsxger(env, a, b, at, mask, true, false, false, vsxger_mul32,
+           vsxger_muladd32, vsxger_zero32);
+}
+
+QEMU_FLATTEN
+void helper_XVF32GERPN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                       ppc_acc_t *at, uint32_t mask)
+{
+    vsxger(env, a, b, at, mask, true, false, true, vsxger_mul32,
+           vsxger_muladd32, vsxger_zero32);
+}
+
+QEMU_FLATTEN
+void helper_XVF32GERNP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                       ppc_acc_t *at, uint32_t mask)
+{
+    vsxger(env, a, b, at, mask, true, true, false, vsxger_mul32,
+           vsxger_muladd32, vsxger_zero32);
+}
+
+QEMU_FLATTEN
+void helper_XVF32GERNN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                       ppc_acc_t *at, uint32_t mask)
+{
+    vsxger(env, a, b, at, mask, true, true, true, vsxger_mul32,
+           vsxger_muladd32, vsxger_zero32);
+}
+
+QEMU_FLATTEN
+void helper_XVF64GER(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                     ppc_acc_t *at, uint32_t mask)
+{
+    vsxger(env, a, b, at, mask, false, false, false, vsxger_mul64,
+           vsxger_muladd64, vsxger_zero64);
+}
+
+QEMU_FLATTEN
+void helper_XVF64GERPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                       ppc_acc_t *at, uint32_t mask)
+{
+    vsxger(env, a, b, at, mask, true, false, false, vsxger_mul64,
+           vsxger_muladd64, vsxger_zero64);
+}
+
+QEMU_FLATTEN
+void helper_XVF64GERPN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                       ppc_acc_t *at, uint32_t mask)
+{
+    vsxger(env, a, b, at, mask, true, false, true, vsxger_mul64,
+           vsxger_muladd64, vsxger_zero64);
+}
+
+QEMU_FLATTEN
+void helper_XVF64GERNP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                       ppc_acc_t *at, uint32_t mask)
+{
+    vsxger(env, a, b, at, mask, true, true, false, vsxger_mul64,
+           vsxger_muladd64, vsxger_zero64);
+}
+
+QEMU_FLATTEN
+void helper_XVF64GERNN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                       ppc_acc_t *at, uint32_t mask)
+{
+    vsxger(env, a, b, at, mask, true, true, true, vsxger_mul64,
+           vsxger_muladd64, vsxger_zero64);
+}
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 29354276f0..054d25f3b0 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -550,6 +550,16 @@ DEF_HELPER_5(XVI16GER2, void, env, vsr, vsr, acc, i32)
 DEF_HELPER_5(XVI16GER2S, void, env, vsr, vsr, acc, i32)
 DEF_HELPER_5(XVI16GER2PP, void, env, vsr, vsr, acc, i32)
 DEF_HELPER_5(XVI16GER2SPP, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF32GER, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF32GERPP, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF32GERPN, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF32GERNP, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF32GERNN, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF64GER, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF64GERPP, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF64GERPN, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF64GERNP, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF64GERNN, void, env, vsr, vsr, acc, i32)
 
 DEF_HELPER_2(efscfsi, i32, env, i32)
 DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 899a04bf77..c561a17c7d 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -173,6 +173,7 @@
 # 32 bit GER instructions have all mask bits considered 1
 &MMIRR_XX3      xa xb xt pmsk xmsk ymsk
 %xx_at          23:3
+%xx_xa_pair     2:1 17:4 !function=times_2
 @XX3_at         ...... ... .. ..... ..... ........ ...          &MMIRR_XX3 xt=%xx_at xb=%xx_xb \
                                                                 pmsk=255 xmsk=15 ymsk=15
 
@@ -737,3 +738,15 @@ XVI16GER2PP     111011 ... -- ..... ..... 01101011 ..-  @XX3_at xa=%xx_xa
 XVI8GER4SPP     111011 ... -- ..... ..... 01100011 ..-  @XX3_at xa=%xx_xa
 XVI16GER2S      111011 ... -- ..... ..... 00101011 ..-  @XX3_at xa=%xx_xa
 XVI16GER2SPP    111011 ... -- ..... ..... 00101010 ..-  @XX3_at xa=%xx_xa
+
+XVF32GER        111011 ... -- ..... ..... 00011011 ..-  @XX3_at xa=%xx_xa
+XVF32GERPP      111011 ... -- ..... ..... 00011010 ..-  @XX3_at xa=%xx_xa
+XVF32GERPN      111011 ... -- ..... ..... 10011010 ..-  @XX3_at xa=%xx_xa
+XVF32GERNP      111011 ... -- ..... ..... 01011010 ..-  @XX3_at xa=%xx_xa
+XVF32GERNN      111011 ... -- ..... ..... 11011010 ..-  @XX3_at xa=%xx_xa
+
+XVF64GER        111011 ... -- .... 0 ..... 00111011 ..-  @XX3_at xa=%xx_xa_pair
+XVF64GERPP      111011 ... -- .... 0 ..... 00111010 ..-  @XX3_at xa=%xx_xa_pair
+XVF64GERPN      111011 ... -- .... 0 ..... 10111010 ..-  @XX3_at xa=%xx_xa_pair
+XVF64GERNP      111011 ... -- .... 0 ..... 01111010 ..-  @XX3_at xa=%xx_xa_pair
+XVF64GERNN      111011 ... -- .... 0 ..... 11111010 ..-  @XX3_at xa=%xx_xa_pair
diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc
index c9ed898bb6..76747956bb 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -2869,6 +2869,18 @@ TRANS64(PMXVI16GER2PP, do_ger, gen_helper_XVI16GER2PP)
 TRANS64(PMXVI16GER2S, do_ger, gen_helper_XVI16GER2S)
 TRANS64(PMXVI16GER2SPP, do_ger, gen_helper_XVI16GER2SPP)
 
+TRANS(XVF32GER, do_ger, gen_helper_XVF32GER)
+TRANS(XVF32GERPP, do_ger, gen_helper_XVF32GERPP)
+TRANS(XVF32GERPN, do_ger, gen_helper_XVF32GERPN)
+TRANS(XVF32GERNP, do_ger, gen_helper_XVF32GERNP)
+TRANS(XVF32GERNN, do_ger, gen_helper_XVF32GERNN)
+
+TRANS(XVF64GER, do_ger, gen_helper_XVF64GER)
+TRANS(XVF64GERPP, do_ger, gen_helper_XVF64GERPP)
+TRANS(XVF64GERPN, do_ger, gen_helper_XVF64GERPN)
+TRANS(XVF64GERNP, do_ger, gen_helper_XVF64GERNP)
+TRANS(XVF64GERNN, do_ger, gen_helper_XVF64GERNN)
+
 #undef GEN_XX2FORM
 #undef GEN_XX3FORM
 #undef GEN_XX2IFORM
-- 
2.31.1



  parent reply	other threads:[~2022-05-17 18:42 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-17 18:37 [PATCH v3 0/8] VSX MMA Implementation Lucas Mateus Castro(alqotel)
2022-05-17 18:37 ` [PATCH v3 1/8] target/ppc: Implement xxm[tf]acc and xxsetaccz Lucas Mateus Castro(alqotel)
2022-05-17 18:37 ` [PATCH v3 2/8] target/ppc: Implemented xvi*ger* instructions Lucas Mateus Castro(alqotel)
2022-05-17 18:37 ` [PATCH v3 3/8] target/ppc: Implemented pmxvi*ger* instructions Lucas Mateus Castro(alqotel)
2022-05-17 19:17   ` Richard Henderson
2022-05-17 18:37 ` Lucas Mateus Castro(alqotel) [this message]
2022-05-17 19:22   ` [PATCH v3 4/8] target/ppc: Implemented xvf*ger* Richard Henderson
2022-05-17 18:37 ` [PATCH v3 5/8] target/ppc: Implemented xvf16ger* Lucas Mateus Castro(alqotel)
2022-05-17 19:40   ` Richard Henderson
2022-05-17 18:37 ` [PATCH v3 6/8] target/ppc: Implemented pmxvf*ger* Lucas Mateus Castro(alqotel)
2022-05-17 18:37 ` [PATCH v3 7/8] target/ppc: Implemented [pm]xvbf16ger2* Lucas Mateus Castro(alqotel)
2022-05-17 18:37 ` [PATCH v3 8/8] linux-user: Add PowerPC ISA 3.1 and MMA to hwcap Lucas Mateus Castro(alqotel)
2022-05-17 19:41   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220517183735.110342-5-lucas.araujo@eldorado.org.br \
    --to=lucas.araujo@eldorado.org.br \
    --cc=clg@kaod.org \
    --cc=danielhb413@gmail.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=groug@kaod.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.