All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Henrique Barboza <danielhb413@gmail.com>
To: qemu-devel@nongnu.org
Cc: qemu-ppc@nongnu.org, danielhb413@gmail.com,
	peter.maydell@linaro.org, richard.henderson@linaro.org,
	"Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Subject: [PULL 31/34] target/ppc: Implemented xvf16ger*
Date: Thu, 26 May 2022 18:38:12 -0300	[thread overview]
Message-ID: <20220526213815.92701-32-danielhb413@gmail.com> (raw)
In-Reply-To: <20220526213815.92701-1-danielhb413@gmail.com>

From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>

Implement the following PowerISA v3.1 instructions:
xvf16ger2:   VSX Vector 16-bit Floating-Point GER (rank-2 update)
xvf16ger2nn: VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative
multiply, Negative accumulate
xvf16ger2np: VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative
multiply, Positive accumulate
xvf16ger2pn: VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive
multiply, Negative accumulate
xvf16ger2pp: VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive
multiply, Positive accumulate

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220524140537.27451-6-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 target/ppc/cpu.h                    |  3 +
 target/ppc/fpu_helper.c             | 95 +++++++++++++++++++++++++++++
 target/ppc/helper.h                 |  5 ++
 target/ppc/insn32.decode            |  6 ++
 target/ppc/translate/vsx-impl.c.inc |  6 ++
 5 files changed, 115 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 40c779f246..6d78078f37 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -227,6 +227,7 @@ typedef union _ppc_vsr_t {
     int16_t s16[8];
     int32_t s32[4];
     int64_t s64[2];
+    float16 f16[8];
     float32 f32[4];
     float64 f64[2];
     float128 f128;
@@ -2643,6 +2644,7 @@ static inline bool lsw_reg_in_range(int start, int nregs, int rx)
 #define VsrSW(i) s32[i]
 #define VsrD(i) u64[i]
 #define VsrSD(i) s64[i]
+#define VsrHF(i) f16[i]
 #define VsrSF(i) f32[i]
 #define VsrDF(i) f64[i]
 #else
@@ -2654,6 +2656,7 @@ static inline bool lsw_reg_in_range(int start, int nregs, int rx)
 #define VsrSW(i) s32[3 - (i)]
 #define VsrD(i) u64[1 - (i)]
 #define VsrSD(i) s64[1 - (i)]
+#define VsrHF(i) f16[7 - (i)]
 #define VsrSF(i) f32[3 - (i)]
 #define VsrDF(i) f64[1 - (i)]
 #endif
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 712c71162c..a9b2ef370f 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -36,6 +36,15 @@ static inline float128 float128_snan_to_qnan(float128 x)
 #define float32_snan_to_qnan(x) ((x) | 0x00400000)
 #define float16_snan_to_qnan(x) ((x) | 0x0200)
 
+static inline float32 bfp32_neg(float32 a)
+{
+    if (unlikely(float32_is_any_nan(a))) {
+        return a;
+    } else {
+        return float32_chs(a);
+    }
+}
+
 static inline bool fp_exceptions_enabled(CPUPPCState *env)
 {
 #ifdef CONFIG_USER_ONLY
@@ -3501,6 +3510,57 @@ static inline void vsxger_excp(CPUPPCState *env, uintptr_t retaddr)
     do_fpscr_check_status(env, retaddr);
 }
 
+typedef float64 extract_f16(float16, float_status *);
+
+static float64 extract_hf16(float16 in, float_status *fp_status)
+{
+    return float16_to_float64(in, true, fp_status);
+}
+
+static void vsxger16(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                     ppc_acc_t  *at, uint32_t mask, bool acc,
+                     bool neg_mul, bool neg_acc, extract_f16 extract)
+{
+    float32 r, aux_acc;
+    float64 psum, va, vb, vc, vd;
+    int i, j, xmsk_bit, ymsk_bit;
+    uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
+            xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
+            ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
+    float_status *excp_ptr = &env->fp_status;
+    for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
+        for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
+            if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
+                va = !(pmsk & 2) ? float64_zero :
+                                   extract(a->VsrHF(2 * i), excp_ptr);
+                vb = !(pmsk & 2) ? float64_zero :
+                                   extract(b->VsrHF(2 * j), excp_ptr);
+                vc = !(pmsk & 1) ? float64_zero :
+                                   extract(a->VsrHF(2 * i + 1), excp_ptr);
+                vd = !(pmsk & 1) ? float64_zero :
+                                   extract(b->VsrHF(2 * j + 1), excp_ptr);
+                psum = float64_mul(va, vb, excp_ptr);
+                psum = float64r32_muladd(vc, vd, psum, 0, excp_ptr);
+                r = float64_to_float32(psum, excp_ptr);
+                if (acc) {
+                    aux_acc = at[i].VsrSF(j);
+                    if (neg_mul) {
+                        r = bfp32_neg(r);
+                    }
+                    if (neg_acc) {
+                        aux_acc = bfp32_neg(aux_acc);
+                    }
+                    r = float32_add(r, aux_acc, excp_ptr);
+                }
+                at[i].VsrSF(j) = r;
+            } else {
+                at[i].VsrSF(j) = float32_zero;
+            }
+        }
+    }
+    vsxger_excp(env, GETPC());
+}
+
 typedef void vsxger_zero(ppc_vsr_t *at, int, int);
 
 typedef void vsxger_muladd_f(ppc_vsr_t *, ppc_vsr_t *, ppc_vsr_t *, int, int,
@@ -3579,6 +3639,41 @@ static void vsxger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
     vsxger_excp(env, GETPC());
 }
 
+QEMU_FLATTEN
+void helper_XVF16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                     ppc_acc_t *at, uint32_t mask)
+{
+    vsxger16(env, a, b, at, mask, false, false, false, extract_hf16);
+}
+
+QEMU_FLATTEN
+void helper_XVF16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                        ppc_acc_t *at, uint32_t mask)
+{
+    vsxger16(env, a, b, at, mask, true, false, false, extract_hf16);
+}
+
+QEMU_FLATTEN
+void helper_XVF16GER2PN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                        ppc_acc_t *at, uint32_t mask)
+{
+    vsxger16(env, a, b, at, mask, true, false, true, extract_hf16);
+}
+
+QEMU_FLATTEN
+void helper_XVF16GER2NP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                        ppc_acc_t *at, uint32_t mask)
+{
+    vsxger16(env, a, b, at, mask, true, true, false, extract_hf16);
+}
+
+QEMU_FLATTEN
+void helper_XVF16GER2NN(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+                        ppc_acc_t *at, uint32_t mask)
+{
+    vsxger16(env, a, b, at, mask, true, true, true, extract_hf16);
+}
+
 QEMU_FLATTEN
 void helper_XVF32GER(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
                      ppc_acc_t *at, uint32_t mask)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index f38cdbe1d8..4070c0891c 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -550,6 +550,11 @@ DEF_HELPER_5(XVI16GER2, void, env, vsr, vsr, acc, i32)
 DEF_HELPER_5(XVI16GER2S, void, env, vsr, vsr, acc, i32)
 DEF_HELPER_5(XVI16GER2PP, void, env, vsr, vsr, acc, i32)
 DEF_HELPER_5(XVI16GER2SPP, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF16GER2, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF16GER2PP, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF16GER2PN, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF16GER2NP, void, env, vsr, vsr, acc, i32)
+DEF_HELPER_5(XVF16GER2NN, void, env, vsr, vsr, acc, i32)
 DEF_HELPER_5(XVF32GER, void, env, vsr, vsr, acc, i32)
 DEF_HELPER_5(XVF32GERPP, void, env, vsr, vsr, acc, i32)
 DEF_HELPER_5(XVF32GERPN, void, env, vsr, vsr, acc, i32)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 6b644155ec..b8e317159c 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -763,6 +763,12 @@ XVI8GER4SPP     111011 ... -- ..... ..... 01100011 ..-  @XX3_at xa=%xx_xa
 XVI16GER2S      111011 ... -- ..... ..... 00101011 ..-  @XX3_at xa=%xx_xa
 XVI16GER2SPP    111011 ... -- ..... ..... 00101010 ..-  @XX3_at xa=%xx_xa
 
+XVF16GER2       111011 ... -- ..... ..... 00010011 ..-  @XX3_at xa=%xx_xa
+XVF16GER2PP     111011 ... -- ..... ..... 00010010 ..-  @XX3_at xa=%xx_xa
+XVF16GER2PN     111011 ... -- ..... ..... 10010010 ..-  @XX3_at xa=%xx_xa
+XVF16GER2NP     111011 ... -- ..... ..... 01010010 ..-  @XX3_at xa=%xx_xa
+XVF16GER2NN     111011 ... -- ..... ..... 11010010 ..-  @XX3_at xa=%xx_xa
+
 XVF32GER        111011 ... -- ..... ..... 00011011 ..-  @XX3_at xa=%xx_xa
 XVF32GERPP      111011 ... -- ..... ..... 00011010 ..-  @XX3_at xa=%xx_xa
 XVF32GERPN      111011 ... -- ..... ..... 10011010 ..-  @XX3_at xa=%xx_xa
diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc
index cc754ab175..01978a585a 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -2898,6 +2898,12 @@ TRANS64(PMXVI16GER2PP, do_ger, gen_helper_XVI16GER2PP)
 TRANS64(PMXVI16GER2S, do_ger, gen_helper_XVI16GER2S)
 TRANS64(PMXVI16GER2SPP, do_ger, gen_helper_XVI16GER2SPP)
 
+TRANS(XVF16GER2, do_ger, gen_helper_XVF16GER2)
+TRANS(XVF16GER2PP, do_ger, gen_helper_XVF16GER2PP)
+TRANS(XVF16GER2PN, do_ger, gen_helper_XVF16GER2PN)
+TRANS(XVF16GER2NP, do_ger, gen_helper_XVF16GER2NP)
+TRANS(XVF16GER2NN, do_ger, gen_helper_XVF16GER2NN)
+
 TRANS(XVF32GER, do_ger, gen_helper_XVF32GER)
 TRANS(XVF32GERPP, do_ger, gen_helper_XVF32GERPP)
 TRANS(XVF32GERPN, do_ger, gen_helper_XVF32GERPN)
-- 
2.36.1



  parent reply	other threads:[~2022-05-26 22:18 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-26 21:37 [PULL 00/34] ppc queue Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 01/34] pseries: allow setting stdout-path even on machines with a VGA Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 02/34] hw/ppc/e500: Remove unused BINARY_DEVICE_TREE_FILE Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 03/34] spapr: Use address from elf parser for kernel address Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 04/34] spapr/docs: Add a few words about x-vof Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 05/34] mos6522: fix linking error when CONFIG_MOS6522 is not set Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 06/34] target/ppc: Fix tlbie Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 07/34] target/ppc: Fix FPSCR.FI bit being cleared when it shouldn't Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 08/34] target/ppc: Fix FPSCR.FI changing in float_overflow_excp() Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 09/34] target/ppc: Rename sfprf to sfifprf where it's also used as set fi flag Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 10/34] pnv/xive2: Don't overwrite PC registers when writing TCTXT registers Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 11/34] target/ppc: declare darn32/darn64 helpers with TCG_CALL_NO_RWG Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 12/34] target/ppc: use TCG_CALL_NO_RWG in vector helpers without env Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 13/34] target/ppc: use TCG_CALL_NO_RWG in BCD helpers Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 14/34] target/ppc: use TCG_CALL_NO_RWG in VSX helpers without env Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 15/34] target/ppc: Use TCG_CALL_NO_RWG_SE in fsel helper Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 16/34] target/ppc: declare xscvspdpn helper with call flags Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 17/34] target/ppc: declare xvxsigsp " Daniel Henrique Barboza
2022-05-26 21:37 ` [PULL 18/34] target/ppc: declare xxextractuw and xxinsertw helpers " Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 19/34] target/ppc: introduce do_va_helper Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 20/34] target/ppc: declare vmsum[um]bm helpers with call flags Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 21/34] target/ppc: declare vmsumuh[ms] helper " Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 22/34] target/ppc: declare vmsumsh[ms] " Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 23/34] target/ppc: Fix eieio memory ordering semantics Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 24/34] tcg/ppc: ST_ST memory ordering is not provided with eieio Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 25/34] tcg/ppc: Optimize memory ordering generation with lwsync Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 26/34] target/ppc: Implement lwsync with weaker memory ordering Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 27/34] target/ppc: Implement xxm[tf]acc and xxsetaccz Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 28/34] target/ppc: Implemented xvi*ger* instructions Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 29/34] target/ppc: Implemented pmxvi*ger* instructions Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 30/34] target/ppc: Implemented xvf*ger* Daniel Henrique Barboza
2022-05-26 21:38 ` Daniel Henrique Barboza [this message]
2022-05-26 21:38 ` [PULL 32/34] target/ppc: Implemented pmxvf*ger* Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 33/34] target/ppc: Implemented [pm]xvbf16ger2* Daniel Henrique Barboza
2022-05-26 21:38 ` [PULL 34/34] linux-user: Add PowerPC ISA 3.1 and MMA to hwcap Daniel Henrique Barboza
2022-05-27 15:19 ` [PULL 00/34] ppc queue Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220526213815.92701-32-danielhb413@gmail.com \
    --to=danielhb413@gmail.com \
    --cc=lucas.araujo@eldorado.org.br \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.