All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] target/mips: Add MAC2008 support
@ 2020-03-28  9:08 Jiaxun Yang
  2020-03-28 19:09 ` Richard Henderson
  0 siblings, 1 reply; 3+ messages in thread
From: Jiaxun Yang @ 2020-03-28  9:08 UTC (permalink / raw)
  To: qemu-devel
  Cc: chenhc, aleksandar.qemu.devel, aleksandar.rikalo, aurelien, Jiaxun Yang

MAC2008 was introduced in MIPS Release 3 but removed in MIPS Release 5.
However, there are some processors implemented this feature.
some Ingenic MCU can config MAC2008 status runtime while whole
Loongson-64 family are MAC2008 only.

FCSR.MAC2008 bit indicates FMA family of instructions on these
processors have fused behavior, similiar to FMA in Release 6,
so we can reuse helpers with them.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
---
 target/mips/cpu.h        |  1 +
 target/mips/fpu_helper.c | 61 +++++++++++++++++++++------------
 target/mips/helper.h     | 12 +++----
 target/mips/translate.c  | 74 +++++++++++++++++++++++++++++++++-------
 4 files changed, 107 insertions(+), 41 deletions(-)

diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index 94d01ea798..b20e6e3387 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -63,6 +63,7 @@ struct CPUMIPSFPUContext {
     uint32_t fcr31_rw_bitmask;
     uint32_t fcr31;
 #define FCR31_FS 24
+#define FCR31_MAC2008 20
 #define FCR31_ABS2008 19
 #define FCR31_NAN2008 18
 #define SET_FP_COND(num, env)     do { ((env).fcr31) |=                 \
diff --git a/target/mips/fpu_helper.c b/target/mips/fpu_helper.c
index 5287c86c61..2e50d50f36 100644
--- a/target/mips/fpu_helper.c
+++ b/target/mips/fpu_helper.c
@@ -1357,7 +1357,7 @@ FLOAT_MINMAX(mina_d, 64, minnummag)
     }                                                                \
 }
 
-/* FMA based operations */
+/* FMA based operations (both unfused and fused) */
 #define FLOAT_FMA(name, type)                                        \
 uint64_t helper_float_ ## name ## _d(CPUMIPSState *env,              \
                                      uint64_t fdt0, uint64_t fdt1,   \
@@ -1392,33 +1392,52 @@ uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env,             \
     UNFUSED_FMA(float32, fsth0, fsth1, fsth2, type);                 \
     update_fcr31(env, GETPC());                                      \
     return ((uint64_t)fsth0 << 32) | fst0;                           \
+}                                                                    \
+uint64_t helper_float_ ## name ## f_d(CPUMIPSState *env,             \
+                                     uint64_t fdt0, uint64_t fdt1,   \
+                                     uint64_t fdt2)                  \
+{                                                                    \
+    fdt0 = float64_muladd(fdt0, fdt1, fdt2, type,                    \
+                            &env->active_fpu.fp_status);             \
+    update_fcr31(env, GETPC());                                      \
+    return fdt0;                                                     \
+}                                                                    \
+                                                                     \
+uint32_t helper_float_ ## name ## f_s(CPUMIPSState *env,             \
+                                     uint32_t fst0, uint32_t fst1,   \
+                                     uint32_t fst2)                  \
+{                                                                    \
+    fst0 = float32_muladd(fst0, fst1, fst2, type,                    \
+                            &env->active_fpu.fp_status);             \
+    update_fcr31(env, GETPC());                                      \
+    return fst0;                                                     \
+}                                                                    \
+                                                                     \
+uint64_t helper_float_ ## name ## f_ps(CPUMIPSState *env,            \
+                                      uint64_t fdt0, uint64_t fdt1,  \
+                                      uint64_t fdt2)                 \
+{                                                                    \
+    uint32_t fst0 = fdt0 & 0XFFFFFFFF;                               \
+    uint32_t fsth0 = fdt0 >> 32;                                     \
+    uint32_t fst1 = fdt1 & 0XFFFFFFFF;                               \
+    uint32_t fsth1 = fdt1 >> 32;                                     \
+    uint32_t fst2 = fdt2 & 0XFFFFFFFF;                               \
+    uint32_t fsth2 = fdt2 >> 32;                                     \
+                                                                     \
+    fst0 = float32_muladd(fst0, fst1, fst2, type,                    \
+                            &env->active_fpu.fp_status);             \
+    fsth0 = float32_muladd(fsth0, fsth1, fsth2, type,                \
+                            &env->active_fpu.fp_status);             \
+    update_fcr31(env, GETPC());                                      \
+    return ((uint64_t)fsth0 << 32) | fst0;                           \
 }
+
 FLOAT_FMA(madd, 0)
 FLOAT_FMA(msub, float_muladd_negate_c)
 FLOAT_FMA(nmadd, float_muladd_negate_result)
 FLOAT_FMA(nmsub, float_muladd_negate_result | float_muladd_negate_c)
 #undef FLOAT_FMA
 
-#define FLOAT_FMADDSUB(name, bits, muladd_arg)                          \
-uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env,             \
-                                         uint ## bits ## _t fs,         \
-                                         uint ## bits ## _t ft,         \
-                                         uint ## bits ## _t fd)         \
-{                                                                       \
-    uint ## bits ## _t fdret;                                           \
-                                                                        \
-    fdret = float ## bits ## _muladd(fs, ft, fd, muladd_arg,            \
-                                     &env->active_fpu.fp_status);       \
-    update_fcr31(env, GETPC());                                         \
-    return fdret;                                                       \
-}
-
-FLOAT_FMADDSUB(maddf_s, 32, 0)
-FLOAT_FMADDSUB(maddf_d, 64, 0)
-FLOAT_FMADDSUB(msubf_s, 32, float_muladd_negate_product)
-FLOAT_FMADDSUB(msubf_d, 64, float_muladd_negate_product)
-#undef FLOAT_FMADDSUB
-
 /* compare operations */
 #define FOP_COND_D(op, cond)                                   \
 void helper_cmp_d_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
diff --git a/target/mips/helper.h b/target/mips/helper.h
index 84fdd9fd27..56aad63931 100644
--- a/target/mips/helper.h
+++ b/target/mips/helper.h
@@ -232,13 +232,6 @@ DEF_HELPER_3(float_mulr_ps, i64, env, i64, i64)
 DEF_HELPER_FLAGS_2(float_class_s, TCG_CALL_NO_RWG_SE, i32, env, i32)
 DEF_HELPER_FLAGS_2(float_class_d, TCG_CALL_NO_RWG_SE, i64, env, i64)
 
-#define FOP_PROTO(op)                                     \
-DEF_HELPER_4(float_ ## op ## _s, i32, env, i32, i32, i32) \
-DEF_HELPER_4(float_ ## op ## _d, i64, env, i64, i64, i64)
-FOP_PROTO(maddf)
-FOP_PROTO(msubf)
-#undef FOP_PROTO
-
 #define FOP_PROTO(op)                                \
 DEF_HELPER_3(float_ ## op ## _s, i32, env, i32, i32) \
 DEF_HELPER_3(float_ ## op ## _d, i64, env, i64, i64)
@@ -305,7 +298,10 @@ FOP_PROTO(rsqrt2)
 #define FOP_PROTO(op)                                      \
 DEF_HELPER_4(float_ ## op ## _s, i32, env, i32, i32, i32)  \
 DEF_HELPER_4(float_ ## op ## _d, i64, env, i64, i64, i64)  \
-DEF_HELPER_4(float_ ## op ## _ps, i64, env, i64, i64, i64)
+DEF_HELPER_4(float_ ## op ## _ps, i64, env, i64, i64, i64) \
+DEF_HELPER_4(float_ ## op ## f_s, i32, env, i32, i32, i32)  \
+DEF_HELPER_4(float_ ## op ## f_d, i64, env, i64, i64, i64)  \
+DEF_HELPER_4(float_ ## op ## f_ps, i64, env, i64, i64, i64)
 FOP_PROTO(madd)
 FOP_PROTO(msub)
 FOP_PROTO(nmadd)
diff --git a/target/mips/translate.c b/target/mips/translate.c
index d745bd2803..3ce159df97 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -2547,6 +2547,7 @@ typedef struct DisasContext {
     bool mrp;
     bool nan2008;
     bool abs2008;
+    bool mac2008;
     bool saar;
     bool mi;
     int gi;
@@ -12776,7 +12777,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr32(ctx, fp0, fs);
             gen_load_fpr32(ctx, fp1, ft);
             gen_load_fpr32(ctx, fp2, fr);
-            gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_maddf_s(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(ctx, fp2, fd);
@@ -12794,7 +12799,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_madd_d(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_madd_d(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_maddf_d(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12811,7 +12820,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_madd_ps(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_madd_ps(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_maddf_ps(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12828,7 +12841,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr32(ctx, fp0, fs);
             gen_load_fpr32(ctx, fp1, ft);
             gen_load_fpr32(ctx, fp2, fr);
-            gen_helper_float_msub_s(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_msub_s(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_msubf_s(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(ctx, fp2, fd);
@@ -12846,7 +12863,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_msub_d(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_msub_d(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_msubf_d(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12863,7 +12884,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_msub_ps(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_msub_ps(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_msubf_ps(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12880,7 +12905,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr32(ctx, fp0, fs);
             gen_load_fpr32(ctx, fp1, ft);
             gen_load_fpr32(ctx, fp2, fr);
-            gen_helper_float_nmadd_s(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_nmadd_s(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_nmaddf_s(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(ctx, fp2, fd);
@@ -12898,7 +12927,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmadd_d(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_nmadd_d(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_nmaddf_d(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12915,7 +12948,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmadd_ps(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_nmadd_ps(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_nmaddf_ps(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12932,7 +12969,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr32(ctx, fp0, fs);
             gen_load_fpr32(ctx, fp1, ft);
             gen_load_fpr32(ctx, fp2, fr);
-            gen_helper_float_nmsub_s(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_nmsub_s(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_nmsubf_s(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(ctx, fp2, fd);
@@ -12950,7 +12991,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmsub_d(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_nmsub_d(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_nmsubf_d(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -12967,7 +13012,11 @@ static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmsub_ps(fp2, cpu_env, fp0, fp1, fp2);
+            if (ctx->mac2008) {
+                gen_helper_float_nmsub_ps(fp2, cpu_env, fp0, fp1, fp2);
+            } else {
+                gen_helper_float_nmsubf_ps(fp2, cpu_env, fp0, fp1, fp2);
+            }
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -30807,6 +30856,7 @@ static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     ctx->mrp = (env->CP0_Config5 >> CP0C5_MRP) & 1;
     ctx->nan2008 = (env->active_fpu.fcr31 >> FCR31_NAN2008) & 1;
     ctx->abs2008 = (env->active_fpu.fcr31 >> FCR31_ABS2008) & 1;
+    ctx->mac2008 = (env->active_fpu.fcr31 >> FCR31_MAC2008) & 1;
     ctx->mi = (env->CP0_Config5 >> CP0C5_MI) & 1;
     ctx->gi = (env->CP0_Config5 >> CP0C5_GI) & 3;
     restore_cpu_state(env, ctx);
-- 
2.26.0.rc2




^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] target/mips: Add MAC2008 support
  2020-03-28  9:08 [PATCH] target/mips: Add MAC2008 support Jiaxun Yang
@ 2020-03-28 19:09 ` Richard Henderson
  2020-03-29  0:05   ` Jiaxun Yang
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Henderson @ 2020-03-28 19:09 UTC (permalink / raw)
  To: Jiaxun Yang, qemu-devel
  Cc: chenhc, aleksandar.qemu.devel, aleksandar.rikalo, aurelien

On 3/28/20 2:08 AM, Jiaxun Yang wrote:
> -            gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
> +            if (ctx->mac2008) {
> +                gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
> +            } else {
> +                gen_helper_float_maddf_s(fp2, cpu_env, fp0, fp1, fp2);
> +            }
>  

Surely this test is backward, that mac2008 invokes maddf.


r~


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] target/mips: Add MAC2008 support
  2020-03-28 19:09 ` Richard Henderson
@ 2020-03-29  0:05   ` Jiaxun Yang
  0 siblings, 0 replies; 3+ messages in thread
From: Jiaxun Yang @ 2020-03-29  0:05 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel
  Cc: chenhc, aleksandar.qemu.devel, aleksandar.rikalo, aurelien



于 2020年3月29日 GMT+08:00 上午3:09:16, Richard Henderson <richard.henderson@linaro.org> 写到:
>On 3/28/20 2:08 AM, Jiaxun Yang wrote:
>> -            gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
>> +            if (ctx->mac2008) {
>> +                gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1,
>fp2);
>> +            } else {
>> +                gen_helper_float_maddf_s(fp2, cpu_env, fp0, fp1,
>fp2);
>> +            }
>>  
>
>Surely this test is backward, that mac2008 invokes maddf.

 Sorry for my stupid fault.
Will fix in v2.

>
>
>r~

-- 
Jiaxun Yang


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-03-29  0:07 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-28  9:08 [PATCH] target/mips: Add MAC2008 support Jiaxun Yang
2020-03-28 19:09 ` Richard Henderson
2020-03-29  0:05   ` Jiaxun Yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.