[PATCH 0/3] RFC: target/riscv: add half-precision floating-point extension

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/3] RFC: target/riscv: add half-precision floating-point extension
@ 2020-08-11  8:59 Chih-Min Chao
  2020-08-11  8:59 ` [PATCH 1/3] target/riscv: add NaN-Boxing helper for half-float Chih-Min Chao
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Chih-Min Chao @ 2020-08-11  8:59 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv; +Cc: Chih-Min Chao

The spec is a draft but required by vector extension.  The reference is 
availabe at
   https://github.com/riscv/riscv-isa-manual/tree/zfh

The patch depends two unmerged patch set
  1. extend softfloat to support int8 and alternative NaN probagapation
  2. NaNBox fix

Based-on: <1596102747-20226-1-git-send-email-chihmin.chao@sifive.com>
Based-on: <20200724002807.441147-1-richard.henderson@linaro.org>

Chih-Min Chao (2):
  target/riscv: add NaN-Boxing helper for half-float
  target/riscv: support 'x-k' in cpu option

Kito Cheng (1):
  target/riscv: Implement zfh extension

 target/riscv/cpu.c                        |   4 +
 target/riscv/cpu.h                        |   2 +
 target/riscv/fpu_helper.c                 | 180 ++++++++++
 target/riscv/helper.h                     |  34 ++
 target/riscv/insn32-64.decode             |   6 +
 target/riscv/insn32.decode                |  32 ++
 target/riscv/insn_trans/trans_rvzfh.inc.c | 531 ++++++++++++++++++++++++++++++
 target/riscv/internals.h                  |  16 +
 target/riscv/translate.c                  |  16 +
 9 files changed, 821 insertions(+)
 create mode 100644 target/riscv/insn_trans/trans_rvzfh.inc.c

-- 
2.7.4



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] target/riscv: add NaN-Boxing helper for half-float
  2020-08-11  8:59 [PATCH 0/3] RFC: target/riscv: add half-precision floating-point extension Chih-Min Chao
@ 2020-08-11  8:59 ` Chih-Min Chao
  2020-08-11  8:59 ` [PATCH 2/3] target/riscv: Implement zfh extension Chih-Min Chao
  2020-08-11  8:59 ` [PATCH 3/3] target/riscv: support 'x-k' in cpu option Chih-Min Chao
  2 siblings, 0 replies; 5+ messages in thread
From: Chih-Min Chao @ 2020-08-11  8:59 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: Chih-Min Chao, Alistair Francis, Palmer Dabbelt,
	Sagar Karandikar, Bastian Koppelmann

When writing, box the 16bit value with all ones in high part[63:16]
When reading, unbox the 16bit value from 64bit storage and validate it

Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
---
 target/riscv/internals.h | 16 ++++++++++++++++
 target/riscv/translate.c | 15 +++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index f1a546d..522c306 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -54,4 +54,20 @@ static inline float32 check_nanbox_s(uint64_t f)
     }
 }
 
+static inline uint64_t nanbox_h(float16 f)
+{
+    return f | MAKE_64BIT_MASK(16, 48);
+}
+
+static inline float16 check_nanbox_h(uint64_t f)
+{
+    uint64_t mask = MAKE_64BIT_MASK(16, 48);
+
+    if (likely((f & mask) == mask)) {
+        return (uint16_t)f;
+    } else {
+        return 0x7E00u; /* default qnan */
+    }
+}
+
 #endif
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index bf35182..e227534 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -119,6 +119,21 @@ static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
     tcg_temp_free_i64(t_nan);
 }
 
+static void gen_nanbox_h(TCGv_i64 out, TCGv_i64 in)
+{
+    tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(16, 48));
+}
+
+static void gen_check_nanbox_h(TCGv_i64 out, TCGv_i64 in)
+{
+    TCGv_i64 t_max = tcg_const_i64(0xffffffffffff0000ull);
+    TCGv_i64 t_nan = tcg_const_i64(0xffffffffffff7e00ull);
+
+    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
+    tcg_temp_free_i64(t_max);
+    tcg_temp_free_i64(t_nan);
+}
+
 static void generate_exception(DisasContext *ctx, int excp)
 {
     tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
-- 
2.7.4



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] target/riscv: Implement zfh extension
  2020-08-11  8:59 [PATCH 0/3] RFC: target/riscv: add half-precision floating-point extension Chih-Min Chao
  2020-08-11  8:59 ` [PATCH 1/3] target/riscv: add NaN-Boxing helper for half-float Chih-Min Chao
@ 2020-08-11  8:59 ` Chih-Min Chao
  2020-08-25 19:00   ` Alistair Francis
  2020-08-11  8:59 ` [PATCH 3/3] target/riscv: support 'x-k' in cpu option Chih-Min Chao
  2 siblings, 1 reply; 5+ messages in thread
From: Chih-Min Chao @ 2020-08-11  8:59 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: Sagar Karandikar, Bastian Koppelmann, Chih-Min Chao,
	Palmer Dabbelt, Alistair Francis, Kito Cheng

From: Kito Cheng <kito.cheng@sifive.com>

All of instructions listed in spec has been implemented. The spec is still
under discussion and not included master branch.

ref:
  https://github.com/riscv/riscv-isa-manual/tree/zfh

Signed-off-by: Kito Cheng <kito.cheng@sifive.com>
Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
---
 target/riscv/cpu.h                        |   1 +
 target/riscv/fpu_helper.c                 | 180 ++++++++++
 target/riscv/helper.h                     |  34 ++
 target/riscv/insn32-64.decode             |   6 +
 target/riscv/insn32.decode                |  32 ++
 target/riscv/insn_trans/trans_rvzfh.inc.c | 531 ++++++++++++++++++++++++++++++
 target/riscv/translate.c                  |   1 +
 7 files changed, 785 insertions(+)
 create mode 100644 target/riscv/insn_trans/trans_rvzfh.inc.c

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index a804a5d..6a077c1 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -65,6 +65,7 @@
 #define RVS RV('S')
 #define RVU RV('U')
 #define RVH RV('H')
+#define RVZfh RV('K')
 
 /* S extension denotes that Supervisor mode exists, however it is possible
    to have a core that support S mode but does not have an MMU and there
diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c
index 2c52ff1..5b1b210 100644
--- a/target/riscv/fpu_helper.c
+++ b/target/riscv/fpu_helper.c
@@ -81,6 +81,15 @@ void helper_set_rounding_mode(CPURISCVState *env, uint32_t rm)
     set_float_rounding_mode(softrm, &env->fp_status);
 }
 
+static uint64_t do_fmadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2,
+                           uint64_t rs3, int flags)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    float16 frs2 = check_nanbox_h(rs2);
+    float16 frs3 = check_nanbox_h(rs3);
+    return nanbox_h(float16_muladd(frs1, frs2, frs3, flags, &env->fp_status));
+}
+
 static uint64_t do_fmadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2,
                            uint64_t rs3, int flags)
 {
@@ -102,6 +111,12 @@ uint64_t helper_fmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
     return float64_muladd(frs1, frs2, frs3, 0, &env->fp_status);
 }
 
+uint64_t helper_fmadd_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
+                        uint64_t frs3)
+{
+    return do_fmadd_h(env, frs1, frs2, frs3, 0);
+}
+
 uint64_t helper_fmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
                         uint64_t frs3)
 {
@@ -115,6 +130,12 @@ uint64_t helper_fmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
                           &env->fp_status);
 }
 
+uint64_t helper_fmsub_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
+                        uint64_t frs3)
+{
+    return do_fmadd_h(env, frs1, frs2, frs3, float_muladd_negate_c);
+}
+
 uint64_t helper_fnmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
                          uint64_t frs3)
 {
@@ -128,6 +149,12 @@ uint64_t helper_fnmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
                           &env->fp_status);
 }
 
+uint64_t helper_fnmsub_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
+                         uint64_t frs3)
+{
+    return do_fmadd_h(env, frs1, frs2, frs3, float_muladd_negate_product);
+}
+
 uint64_t helper_fnmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
                          uint64_t frs3)
 {
@@ -142,6 +169,13 @@ uint64_t helper_fnmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
                           float_muladd_negate_product, &env->fp_status);
 }
 
+uint64_t helper_fnmadd_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
+                         uint64_t frs3)
+{
+    return do_fmadd_h(env, frs1, frs2, frs3,
+                      float_muladd_negate_c | float_muladd_negate_product);
+}
+
 uint64_t helper_fadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
 {
     float32 frs1 = check_nanbox_s(rs1);
@@ -374,3 +408,149 @@ target_ulong helper_fclass_d(uint64_t frs1)
 {
     return fclass_d(frs1);
 }
+
+uint64_t helper_fadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    float16 frs2 = check_nanbox_h(rs2);
+    return nanbox_h(float16_add(frs1, frs2, &env->fp_status));
+}
+
+uint64_t helper_fsub_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    float16 frs2 = check_nanbox_h(rs2);
+    return nanbox_h(float16_sub(frs1, frs2, &env->fp_status));
+}
+
+uint64_t helper_fmul_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    float16 frs2 = check_nanbox_h(rs2);
+    return nanbox_h(float16_mul(frs1, frs2, &env->fp_status));
+}
+
+uint64_t helper_fdiv_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    float16 frs2 = check_nanbox_h(rs2);
+    return nanbox_h(float16_div(frs1, frs2, &env->fp_status));
+}
+
+uint64_t helper_fmin_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    float16 frs2 = check_nanbox_h(rs2);
+    return nanbox_h(float16_minnum_noprop(frs1, frs2, &env->fp_status));
+}
+
+uint64_t helper_fmax_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    float16 frs2 = check_nanbox_h(rs2);
+    return nanbox_h(float16_maxnum_noprop(frs1, frs2, &env->fp_status));
+}
+
+uint64_t helper_fsqrt_h(CPURISCVState *env, uint64_t rs1)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    return nanbox_h(float16_sqrt(frs1, &env->fp_status));
+}
+
+target_ulong helper_fle_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    float16 frs2 = check_nanbox_h(rs2);
+    return float16_le(frs1, frs2, &env->fp_status);
+}
+
+target_ulong helper_flt_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    float16 frs2 = check_nanbox_h(rs2);
+    return float16_lt(frs1, frs2, &env->fp_status);
+}
+
+target_ulong helper_feq_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    float16 frs2 = check_nanbox_h(rs2);
+    return float16_eq_quiet(frs1, frs2, &env->fp_status);
+}
+
+target_ulong helper_fclass_h(uint64_t rs1)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    return fclass_h(frs1);
+}
+
+target_ulong helper_fcvt_w_h(CPURISCVState *env, uint64_t rs1)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    return float16_to_int32(frs1, &env->fp_status);
+}
+
+target_ulong helper_fcvt_wu_h(CPURISCVState *env, uint64_t rs1)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    return (int32_t)float16_to_uint32(frs1, &env->fp_status);
+}
+
+#if defined(TARGET_RISCV64)
+uint64_t helper_fcvt_l_h(CPURISCVState *env, uint64_t rs1)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    return float16_to_int64(frs1, &env->fp_status);
+}
+
+uint64_t helper_fcvt_lu_h(CPURISCVState *env, uint64_t rs1)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    return float16_to_uint64(frs1, &env->fp_status);
+}
+#endif
+
+uint64_t helper_fcvt_h_w(CPURISCVState *env, target_ulong rs1)
+{
+    return nanbox_h(int32_to_float16((int32_t)rs1, &env->fp_status));
+}
+
+uint64_t helper_fcvt_h_wu(CPURISCVState *env, target_ulong rs1)
+{
+    return nanbox_h(uint32_to_float16((uint32_t)rs1, &env->fp_status));
+}
+
+#if defined(TARGET_RISCV64)
+uint64_t helper_fcvt_h_l(CPURISCVState *env, uint64_t rs1)
+{
+    return nanbox_h(int64_to_float16(rs1, &env->fp_status));
+}
+
+uint64_t helper_fcvt_h_lu(CPURISCVState *env, uint64_t rs1)
+{
+    return nanbox_h(uint64_to_float16(rs1, &env->fp_status));
+}
+#endif
+
+uint64_t helper_fcvt_h_s(CPURISCVState *env, uint64_t rs1)
+{
+    float32 frs1 = check_nanbox_s(rs1);
+    return nanbox_h(float32_to_float16(frs1, true, &env->fp_status));
+}
+
+uint64_t helper_fcvt_s_h(CPURISCVState *env, uint64_t rs1)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    return nanbox_s(float16_to_float32(frs1, true, &env->fp_status));
+}
+
+uint64_t helper_fcvt_h_d(CPURISCVState *env, uint64_t rs1)
+{
+    return nanbox_h(float64_to_float16(rs1, true, &env->fp_status));
+}
+
+uint64_t helper_fcvt_d_h(CPURISCVState *env, uint64_t rs1)
+{
+    float16 frs1 = check_nanbox_h(rs1);
+    return float16_to_float64(frs1, true, &env->fp_status);
+}
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index acc2982..66c95dc 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -7,12 +7,16 @@ DEF_HELPER_FLAGS_2(set_rounding_mode, TCG_CALL_NO_WG, void, env, i32)
 /* Floating Point - fused */
 DEF_HELPER_FLAGS_4(fmadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(fmadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fmadd_h, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(fmsub_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(fmsub_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fmsub_h, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(fnmsub_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(fnmsub_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fnmsub_h, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(fnmadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(fnmadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(fnmadd_h, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
 
 /* Floating Point - Single Precision */
 DEF_HELPER_FLAGS_3(fadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64)
@@ -66,6 +70,36 @@ DEF_HELPER_FLAGS_2(fcvt_d_lu, TCG_CALL_NO_RWG, i64, env, tl)
 #endif
 DEF_HELPER_FLAGS_1(fclass_d, TCG_CALL_NO_RWG_SE, tl, i64)
 
+/* Floating Point - Half Precision */
+DEF_HELPER_FLAGS_3(fadd_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fsub_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fmul_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fdiv_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fmin_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(fmax_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_2(fsqrt_h, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_3(fle_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
+DEF_HELPER_FLAGS_3(flt_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
+DEF_HELPER_FLAGS_3(feq_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
+DEF_HELPER_FLAGS_2(fcvt_s_h, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_h_s, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_d_h, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_h_d, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_w_h, TCG_CALL_NO_RWG, tl, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_wu_h, TCG_CALL_NO_RWG, tl, env, i64)
+#if defined(TARGET_RISCV64)
+DEF_HELPER_FLAGS_2(fcvt_l_h, TCG_CALL_NO_RWG, tl, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_lu_h, TCG_CALL_NO_RWG, tl, env, i64)
+#endif
+DEF_HELPER_FLAGS_2(fcvt_h_w, TCG_CALL_NO_RWG, i64, env, tl)
+DEF_HELPER_FLAGS_2(fcvt_h_wu, TCG_CALL_NO_RWG, i64, env, tl)
+#if defined(TARGET_RISCV64)
+DEF_HELPER_FLAGS_2(fcvt_h_l, TCG_CALL_NO_RWG, i64, env, tl)
+DEF_HELPER_FLAGS_2(fcvt_h_lu, TCG_CALL_NO_RWG, i64, env, tl)
+#endif
+DEF_HELPER_FLAGS_1(fclass_h, TCG_CALL_NO_RWG_SE, tl, i64)
+
+
 /* Special functions */
 DEF_HELPER_3(csrrw, tl, env, tl, tl)
 DEF_HELPER_4(csrrs, tl, env, tl, tl, tl)
diff --git a/target/riscv/insn32-64.decode b/target/riscv/insn32-64.decode
index 86153d9..f6f1ac9 100644
--- a/target/riscv/insn32-64.decode
+++ b/target/riscv/insn32-64.decode
@@ -81,3 +81,9 @@ fmv_x_d    1110001  00000 ..... 000 ..... 1010011 @r2
 fcvt_d_l   1101001  00010 ..... ... ..... 1010011 @r2_rm
 fcvt_d_lu  1101001  00011 ..... ... ..... 1010011 @r2_rm
 fmv_d_x    1111001  00000 ..... 000 ..... 1010011 @r2
+
+# *** RV64Zfh Standard Extension (in addition to RV32Zfh) ***
+fcvt_l_h   1100010  00010 ..... ... ..... 1010011 @r2_rm
+fcvt_lu_h  1100010  00011 ..... ... ..... 1010011 @r2_rm
+fcvt_h_l   1101010  00010 ..... ... ..... 1010011 @r2_rm
+fcvt_h_lu  1101010  00011 ..... ... ..... 1010011 @r2_rm
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index bdd8563..185fc4b 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -581,3 +581,35 @@ vcompress_vm    010111 - ..... ..... 010 ..... 1010111 @r
 
 vsetvli         0 ........... ..... 111 ..... 1010111  @r2_zimm
 vsetvl          1000000 ..... ..... 111 ..... 1010111  @r
+
+# *** RV32Zfh Extension ***
+flh        ............   ..... 001 ..... 0000111 @i
+fsh        .......  ..... ..... 001 ..... 0100111 @s
+fmadd_h    ..... 10 ..... ..... ... ..... 1000011 @r4_rm
+fmsub_h    ..... 10 ..... ..... ... ..... 1000111 @r4_rm
+fnmsub_h   ..... 10 ..... ..... ... ..... 1001011 @r4_rm
+fnmadd_h   ..... 10 ..... ..... ... ..... 1001111 @r4_rm
+fadd_h     0000010  ..... ..... ... ..... 1010011 @r_rm
+fsub_h     0000110  ..... ..... ... ..... 1010011 @r_rm
+fmul_h     0001010  ..... ..... ... ..... 1010011 @r_rm
+fdiv_h     0001110  ..... ..... ... ..... 1010011 @r_rm
+fsqrt_h    0101110  00000 ..... ... ..... 1010011 @r2_rm
+fsgnj_h    0010010  ..... ..... 000 ..... 1010011 @r
+fsgnjn_h   0010010  ..... ..... 001 ..... 1010011 @r
+fsgnjx_h   0010010  ..... ..... 010 ..... 1010011 @r
+fmin_h     0010110  ..... ..... 000 ..... 1010011 @r
+fmax_h     0010110  ..... ..... 001 ..... 1010011 @r
+fcvt_h_s   0100010  00000 ..... ... ..... 1010011 @r2_rm
+fcvt_s_h   0100000  00010 ..... ... ..... 1010011 @r2_rm
+fcvt_h_d   0100010  00001 ..... ... ..... 1010011 @r2_rm
+fcvt_d_h   0100001  00010 ..... ... ..... 1010011 @r2_rm
+fcvt_w_h   1100010  00000 ..... ... ..... 1010011 @r2_rm
+fcvt_wu_h  1100010  00001 ..... ... ..... 1010011 @r2_rm
+fmv_x_h    1110010  00000 ..... 000 ..... 1010011 @r2
+feq_h      1010010  ..... ..... 010 ..... 1010011 @r
+flt_h      1010010  ..... ..... 001 ..... 1010011 @r
+fle_h      1010010  ..... ..... 000 ..... 1010011 @r
+fclass_h   1110010  00000 ..... 001 ..... 1010011 @r2
+fcvt_h_w   1101010  00000 ..... ... ..... 1010011 @r2_rm
+fcvt_h_wu  1101010  00001 ..... ... ..... 1010011 @r2_rm
+fmv_h_x    1111010  00000 ..... 000 ..... 1010011 @r2
diff --git a/target/riscv/insn_trans/trans_rvzfh.inc.c b/target/riscv/insn_trans/trans_rvzfh.inc.c
new file mode 100644
index 0000000..1b4dede
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvzfh.inc.c
@@ -0,0 +1,531 @@
+/*
+ * RISC-V translation routines for the RV64Zfh Standard Extension.
+ *
+ * Copyright (c) 2020 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+static bool trans_flh(DisasContext *ctx, arg_flh *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    tcg_gen_addi_tl(t0, t0, a->imm);
+
+    tcg_gen_qemu_ld_i64(cpu_fpr[a->rd], t0, ctx->mem_idx, MO_TEUW);
+
+    gen_nanbox_h(cpu_fpr[a->rd], cpu_fpr[a->rd]);
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fsh(DisasContext *ctx, arg_fsh *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+    tcg_gen_addi_tl(t0, t0, a->imm);
+
+    tcg_gen_qemu_st_i64(cpu_fpr[a->rs2], t0, ctx->mem_idx, MO_TEUW);
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fmadd_h(DisasContext *ctx, arg_fmadd_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fmadd_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                       cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmsub_h(DisasContext *ctx, arg_fmsub_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fmsub_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                       cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fnmsub_h(DisasContext *ctx, arg_fnmsub_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fnmsub_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                        cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fnmadd_h(DisasContext *ctx, arg_fnmadd_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fnmadd_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                        cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fadd_h(DisasContext *ctx, arg_fadd_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fadd_h(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsub_h(DisasContext *ctx, arg_fsub_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fsub_h(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmul_h(DisasContext *ctx, arg_fmul_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fmul_h(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fdiv_h(DisasContext *ctx, arg_fdiv_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fdiv_h(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsqrt_h(DisasContext *ctx, arg_fsqrt_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fsqrt_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsgnj_h(DisasContext *ctx, arg_fsgnj_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    if (a->rs1 == a->rs2) { /* FMOV */
+        gen_check_nanbox_h(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+    } else {
+        TCGv_i64 rs1 = tcg_temp_new_i64();
+        TCGv_i64 rs2 = tcg_temp_new_i64();
+
+        gen_check_nanbox_h(rs1, cpu_fpr[a->rs1]);
+        gen_check_nanbox_h(rs2, cpu_fpr[a->rs2]);
+
+        /* This formulation retains the nanboxing of rs2. */
+        tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 15);
+        tcg_temp_free_i64(rs1);
+        tcg_temp_free_i64(rs2);
+    }
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsgnjn_h(DisasContext *ctx, arg_fsgnjn_h *a)
+{
+    TCGv_i64 rs1, rs2, mask;
+
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    rs1 = tcg_temp_new_i64();
+    gen_check_nanbox_h(rs1, cpu_fpr[a->rs1]);
+
+    if (a->rs1 == a->rs2) { /* FNEG */
+        tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(15, 1));
+    } else {
+        rs2 = tcg_temp_new_i64();
+        gen_check_nanbox_h(rs2, cpu_fpr[a->rs2]);
+
+        /*
+         * Replace bit 15 in rs1 with inverse in rs2.
+         * This formulation retains the nanboxing of rs1.
+         */
+        mask = tcg_const_i64(~MAKE_64BIT_MASK(15, 1));
+        tcg_gen_not_i64(rs2, rs2);
+        tcg_gen_andc_i64(rs2, rs2, mask);
+        tcg_gen_and_i64(rs1, mask, rs1);
+        tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
+
+        tcg_temp_free_i64(mask);
+        tcg_temp_free_i64(rs2);
+    }
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsgnjx_h(DisasContext *ctx, arg_fsgnjx_h *a)
+{
+    TCGv_i64 rs1, rs2;
+
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    rs1 = tcg_temp_new_i64();
+    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+
+    if (a->rs1 == a->rs2) { /* FABS */
+        tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(15, 1));
+    } else {
+        rs2 = tcg_temp_new_i64();
+        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+
+        /*
+         * Xor bit 15 in rs1 with that in rs2.
+         * This formulation retains the nanboxing of rs1.
+         */
+        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(15, 1));
+        tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
+
+        tcg_temp_free_i64(rs2);
+    }
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmin_h(DisasContext *ctx, arg_fmin_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    gen_helper_fmin_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                      cpu_fpr[a->rs2]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmax_h(DisasContext *ctx, arg_fmax_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    gen_helper_fmax_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                      cpu_fpr[a->rs2]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fcvt_s_h(DisasContext *ctx, arg_fcvt_s_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_s_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+
+    mark_fs_dirty(ctx);
+
+    return true;
+}
+
+static bool trans_fcvt_d_h(DisasContext *ctx, arg_fcvt_d_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+    REQUIRE_EXT(ctx, RVD);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_d_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+
+    mark_fs_dirty(ctx);
+
+
+    return true;
+}
+
+static bool trans_fcvt_h_s(DisasContext *ctx, arg_fcvt_h_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_h_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+
+    mark_fs_dirty(ctx);
+
+    return true;
+}
+
+static bool trans_fcvt_h_d(DisasContext *ctx, arg_fcvt_h_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+    REQUIRE_EXT(ctx, RVD);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_h_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+
+    mark_fs_dirty(ctx);
+
+    return true;
+}
+
+static bool trans_feq_h(DisasContext *ctx, arg_feq_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+    TCGv t0 = tcg_temp_new();
+    gen_helper_feq_h(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_flt_h(DisasContext *ctx, arg_flt_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+    TCGv t0 = tcg_temp_new();
+    gen_helper_flt_h(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fle_h(DisasContext *ctx, arg_fle_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+    TCGv t0 = tcg_temp_new();
+    gen_helper_fle_h(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fclass_h(DisasContext *ctx, arg_fclass_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    TCGv t0 = tcg_temp_new();
+
+    gen_helper_fclass_h(t0, cpu_fpr[a->rs1]);
+
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fcvt_w_h(DisasContext *ctx, arg_fcvt_w_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    TCGv t0 = tcg_temp_new();
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_w_h(t0, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fcvt_wu_h(DisasContext *ctx, arg_fcvt_wu_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    TCGv t0 = tcg_temp_new();
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_wu_h(t0, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fcvt_h_w(DisasContext *ctx, arg_fcvt_h_w *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_h_w(cpu_fpr[a->rd], cpu_env, t0);
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fcvt_h_wu(DisasContext *ctx, arg_fcvt_h_wu *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_h_wu(cpu_fpr[a->rd], cpu_env, t0);
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fmv_x_h(DisasContext *ctx, arg_fmv_x_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    TCGv t0 = tcg_temp_new();
+
+#if defined(TARGET_RISCV64)
+    tcg_gen_ext16s_tl(t0, cpu_fpr[a->rs1]); // 16 bits->64 bits
+#else
+    tcg_gen_extrl_i64_i32(t0, cpu_fpr[a->rs1]); //16 bits->32 bits
+    tcg_gen_ext16s_tl(t0, t0);
+#endif
+
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fmv_h_x(DisasContext *ctx, arg_fmv_h_x *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    tcg_gen_extu_tl_i64(cpu_fpr[a->rd], t0);
+    gen_nanbox_h(cpu_fpr[a->rd], cpu_fpr[a->rd]);
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+#ifdef TARGET_RISCV64
+
+static bool trans_fcvt_l_h(DisasContext *ctx, arg_fcvt_l_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    TCGv t0 = tcg_temp_new();
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_l_h(t0, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fcvt_lu_h(DisasContext *ctx, arg_fcvt_lu_h *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    TCGv t0 = tcg_temp_new();
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_lu_h(t0, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fcvt_h_l(DisasContext *ctx, arg_fcvt_h_l *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_h_l(cpu_fpr[a->rd], cpu_env, t0);
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fcvt_h_lu(DisasContext *ctx, arg_fcvt_h_lu *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVZfh);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_h_lu(cpu_fpr[a->rd], cpu_env, t0);
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+
+    return true;
+}
+#endif
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index e227534..6dbd8c9 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -769,6 +769,7 @@ static bool gen_shift(DisasContext *ctx, arg_r *a,
 #include "insn_trans/trans_rvd.inc.c"
 #include "insn_trans/trans_rvh.inc.c"
 #include "insn_trans/trans_rvv.inc.c"
+#include "insn_trans/trans_rvzfh.inc.c"
 #include "insn_trans/trans_privileged.inc.c"
 
 /* Include the auto-generated decoder for 16 bit insn */
-- 
2.7.4



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] target/riscv: support 'x-k' in cpu option
  2020-08-11  8:59 [PATCH 0/3] RFC: target/riscv: add half-precision floating-point extension Chih-Min Chao
  2020-08-11  8:59 ` [PATCH 1/3] target/riscv: add NaN-Boxing helper for half-float Chih-Min Chao
  2020-08-11  8:59 ` [PATCH 2/3] target/riscv: Implement zfh extension Chih-Min Chao
@ 2020-08-11  8:59 ` Chih-Min Chao
  2 siblings, 0 replies; 5+ messages in thread
From: Chih-Min Chao @ 2020-08-11  8:59 UTC (permalink / raw)
  To: qemu-devel, qemu-riscv
  Cc: Chih-Min Chao, Alistair Francis, Palmer Dabbelt,
	Sagar Karandikar, Bastian Koppelmann

zfh extension is still a draft and is not included in master spec. The
related field, 'k', in MISA is also undecided but has been discussed in
reference link.

  ref:
     zfh spec: https://github.com/riscv/riscv-isa-manual/tree/zfh
     misa field: https://github.com/riscv/riscv-isa-manual/issues/414

Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
---
 target/riscv/cpu.c | 4 ++++
 target/riscv/cpu.h | 1 +
 2 files changed, 5 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 228b9bd..8ad8782 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -469,6 +469,9 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
             }
             set_vext_version(env, vext_version);
         }
+        if (cpu->cfg.ext_k) {
+            target_misa |= RVZfh;
+        }
 
         set_misa(env, RVXLEN | target_misa);
     }
@@ -509,6 +512,7 @@ static Property riscv_cpu_properties[] = {
     /* This is experimental so mark with 'x-' */
     DEFINE_PROP_BOOL("x-h", RISCVCPU, cfg.ext_h, false),
     DEFINE_PROP_BOOL("x-v", RISCVCPU, cfg.ext_v, false),
+    DEFINE_PROP_BOOL("x-k", RISCVCPU, cfg.ext_k, false),
     DEFINE_PROP_BOOL("Counters", RISCVCPU, cfg.ext_counters, true),
     DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
     DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 6a077c1..bdef2a8 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -281,6 +281,7 @@ typedef struct RISCVCPU {
         bool ext_u;
         bool ext_h;
         bool ext_v;
+        bool ext_k;
         bool ext_counters;
         bool ext_ifencei;
         bool ext_icsr;
-- 
2.7.4



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/3] target/riscv: Implement zfh extension
  2020-08-11  8:59 ` [PATCH 2/3] target/riscv: Implement zfh extension Chih-Min Chao
@ 2020-08-25 19:00   ` Alistair Francis
  0 siblings, 0 replies; 5+ messages in thread
From: Alistair Francis @ 2020-08-25 19:00 UTC (permalink / raw)
  To: Chih-Min Chao
  Cc: open list:RISC-V, Sagar Karandikar, Bastian Koppelmann,
	qemu-devel@nongnu.org Developers, Alistair Francis,
	Palmer Dabbelt, Kito Cheng

On Tue, Aug 11, 2020 at 2:00 AM Chih-Min Chao <chihmin.chao@sifive.com> wrote:
>
> From: Kito Cheng <kito.cheng@sifive.com>
>
> All of instructions listed in spec has been implemented. The spec is still
> under discussion and not included master branch.

QEMU is happy to accept draft specs, but we need the draft spec at
least somewhat finalised. What that means here is that this probably
has to wait until the draft spec is merged into the master branch of
the RISC-V ISA spec.

Also, the QEMU implementation will need to be fully implemented, at
least all of the mandatory parts of the spec (optional parts can be
left out).

I had a quick skim through this patch and it looks good. Maybe it will
be worth splitting it up a bit more especially if the final
implementation will be larger.

Alistair

>
> ref:
>   https://github.com/riscv/riscv-isa-manual/tree/zfh
>
> Signed-off-by: Kito Cheng <kito.cheng@sifive.com>
> Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
> ---
>  target/riscv/cpu.h                        |   1 +
>  target/riscv/fpu_helper.c                 | 180 ++++++++++
>  target/riscv/helper.h                     |  34 ++
>  target/riscv/insn32-64.decode             |   6 +
>  target/riscv/insn32.decode                |  32 ++
>  target/riscv/insn_trans/trans_rvzfh.inc.c | 531 ++++++++++++++++++++++++++++++
>  target/riscv/translate.c                  |   1 +
>  7 files changed, 785 insertions(+)
>  create mode 100644 target/riscv/insn_trans/trans_rvzfh.inc.c
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index a804a5d..6a077c1 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -65,6 +65,7 @@
>  #define RVS RV('S')
>  #define RVU RV('U')
>  #define RVH RV('H')
> +#define RVZfh RV('K')
>
>  /* S extension denotes that Supervisor mode exists, however it is possible
>     to have a core that support S mode but does not have an MMU and there
> diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c
> index 2c52ff1..5b1b210 100644
> --- a/target/riscv/fpu_helper.c
> +++ b/target/riscv/fpu_helper.c
> @@ -81,6 +81,15 @@ void helper_set_rounding_mode(CPURISCVState *env, uint32_t rm)
>      set_float_rounding_mode(softrm, &env->fp_status);
>  }
>
> +static uint64_t do_fmadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2,
> +                           uint64_t rs3, int flags)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    float16 frs2 = check_nanbox_h(rs2);
> +    float16 frs3 = check_nanbox_h(rs3);
> +    return nanbox_h(float16_muladd(frs1, frs2, frs3, flags, &env->fp_status));
> +}
> +
>  static uint64_t do_fmadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2,
>                             uint64_t rs3, int flags)
>  {
> @@ -102,6 +111,12 @@ uint64_t helper_fmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>      return float64_muladd(frs1, frs2, frs3, 0, &env->fp_status);
>  }
>
> +uint64_t helper_fmadd_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
> +                        uint64_t frs3)
> +{
> +    return do_fmadd_h(env, frs1, frs2, frs3, 0);
> +}
> +
>  uint64_t helper_fmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>                          uint64_t frs3)
>  {
> @@ -115,6 +130,12 @@ uint64_t helper_fmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>                            &env->fp_status);
>  }
>
> +uint64_t helper_fmsub_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
> +                        uint64_t frs3)
> +{
> +    return do_fmadd_h(env, frs1, frs2, frs3, float_muladd_negate_c);
> +}
> +
>  uint64_t helper_fnmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>                           uint64_t frs3)
>  {
> @@ -128,6 +149,12 @@ uint64_t helper_fnmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>                            &env->fp_status);
>  }
>
> +uint64_t helper_fnmsub_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
> +                         uint64_t frs3)
> +{
> +    return do_fmadd_h(env, frs1, frs2, frs3, float_muladd_negate_product);
> +}
> +
>  uint64_t helper_fnmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>                           uint64_t frs3)
>  {
> @@ -142,6 +169,13 @@ uint64_t helper_fnmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>                            float_muladd_negate_product, &env->fp_status);
>  }
>
> +uint64_t helper_fnmadd_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
> +                         uint64_t frs3)
> +{
> +    return do_fmadd_h(env, frs1, frs2, frs3,
> +                      float_muladd_negate_c | float_muladd_negate_product);
> +}
> +
>  uint64_t helper_fadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
>  {
>      float32 frs1 = check_nanbox_s(rs1);
> @@ -374,3 +408,149 @@ target_ulong helper_fclass_d(uint64_t frs1)
>  {
>      return fclass_d(frs1);
>  }
> +
> +uint64_t helper_fadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    float16 frs2 = check_nanbox_h(rs2);
> +    return nanbox_h(float16_add(frs1, frs2, &env->fp_status));
> +}
> +
> +uint64_t helper_fsub_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    float16 frs2 = check_nanbox_h(rs2);
> +    return nanbox_h(float16_sub(frs1, frs2, &env->fp_status));
> +}
> +
> +uint64_t helper_fmul_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    float16 frs2 = check_nanbox_h(rs2);
> +    return nanbox_h(float16_mul(frs1, frs2, &env->fp_status));
> +}
> +
> +uint64_t helper_fdiv_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    float16 frs2 = check_nanbox_h(rs2);
> +    return nanbox_h(float16_div(frs1, frs2, &env->fp_status));
> +}
> +
> +uint64_t helper_fmin_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    float16 frs2 = check_nanbox_h(rs2);
> +    return nanbox_h(float16_minnum_noprop(frs1, frs2, &env->fp_status));
> +}
> +
> +uint64_t helper_fmax_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    float16 frs2 = check_nanbox_h(rs2);
> +    return nanbox_h(float16_maxnum_noprop(frs1, frs2, &env->fp_status));
> +}
> +
> +uint64_t helper_fsqrt_h(CPURISCVState *env, uint64_t rs1)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    return nanbox_h(float16_sqrt(frs1, &env->fp_status));
> +}
> +
> +target_ulong helper_fle_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    float16 frs2 = check_nanbox_h(rs2);
> +    return float16_le(frs1, frs2, &env->fp_status);
> +}
> +
> +target_ulong helper_flt_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    float16 frs2 = check_nanbox_h(rs2);
> +    return float16_lt(frs1, frs2, &env->fp_status);
> +}
> +
> +target_ulong helper_feq_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    float16 frs2 = check_nanbox_h(rs2);
> +    return float16_eq_quiet(frs1, frs2, &env->fp_status);
> +}
> +
> +target_ulong helper_fclass_h(uint64_t rs1)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    return fclass_h(frs1);
> +}
> +
> +target_ulong helper_fcvt_w_h(CPURISCVState *env, uint64_t rs1)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    return float16_to_int32(frs1, &env->fp_status);
> +}
> +
> +target_ulong helper_fcvt_wu_h(CPURISCVState *env, uint64_t rs1)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    return (int32_t)float16_to_uint32(frs1, &env->fp_status);
> +}
> +
> +#if defined(TARGET_RISCV64)
> +uint64_t helper_fcvt_l_h(CPURISCVState *env, uint64_t rs1)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    return float16_to_int64(frs1, &env->fp_status);
> +}
> +
> +uint64_t helper_fcvt_lu_h(CPURISCVState *env, uint64_t rs1)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    return float16_to_uint64(frs1, &env->fp_status);
> +}
> +#endif
> +
> +uint64_t helper_fcvt_h_w(CPURISCVState *env, target_ulong rs1)
> +{
> +    return nanbox_h(int32_to_float16((int32_t)rs1, &env->fp_status));
> +}
> +
> +uint64_t helper_fcvt_h_wu(CPURISCVState *env, target_ulong rs1)
> +{
> +    return nanbox_h(uint32_to_float16((uint32_t)rs1, &env->fp_status));
> +}
> +
> +#if defined(TARGET_RISCV64)
> +uint64_t helper_fcvt_h_l(CPURISCVState *env, uint64_t rs1)
> +{
> +    return nanbox_h(int64_to_float16(rs1, &env->fp_status));
> +}
> +
> +uint64_t helper_fcvt_h_lu(CPURISCVState *env, uint64_t rs1)
> +{
> +    return nanbox_h(uint64_to_float16(rs1, &env->fp_status));
> +}
> +#endif
> +
> +uint64_t helper_fcvt_h_s(CPURISCVState *env, uint64_t rs1)
> +{
> +    float32 frs1 = check_nanbox_s(rs1);
> +    return nanbox_h(float32_to_float16(frs1, true, &env->fp_status));
> +}
> +
> +uint64_t helper_fcvt_s_h(CPURISCVState *env, uint64_t rs1)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    return nanbox_s(float16_to_float32(frs1, true, &env->fp_status));
> +}
> +
> +uint64_t helper_fcvt_h_d(CPURISCVState *env, uint64_t rs1)
> +{
> +    return nanbox_h(float64_to_float16(rs1, true, &env->fp_status));
> +}
> +
> +uint64_t helper_fcvt_d_h(CPURISCVState *env, uint64_t rs1)
> +{
> +    float16 frs1 = check_nanbox_h(rs1);
> +    return float16_to_float64(frs1, true, &env->fp_status);
> +}
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index acc2982..66c95dc 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -7,12 +7,16 @@ DEF_HELPER_FLAGS_2(set_rounding_mode, TCG_CALL_NO_WG, void, env, i32)
>  /* Floating Point - fused */
>  DEF_HELPER_FLAGS_4(fmadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
>  DEF_HELPER_FLAGS_4(fmadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
> +DEF_HELPER_FLAGS_4(fmadd_h, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
>  DEF_HELPER_FLAGS_4(fmsub_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
>  DEF_HELPER_FLAGS_4(fmsub_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
> +DEF_HELPER_FLAGS_4(fmsub_h, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
>  DEF_HELPER_FLAGS_4(fnmsub_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
>  DEF_HELPER_FLAGS_4(fnmsub_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
> +DEF_HELPER_FLAGS_4(fnmsub_h, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
>  DEF_HELPER_FLAGS_4(fnmadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
>  DEF_HELPER_FLAGS_4(fnmadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
> +DEF_HELPER_FLAGS_4(fnmadd_h, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
>
>  /* Floating Point - Single Precision */
>  DEF_HELPER_FLAGS_3(fadd_s, TCG_CALL_NO_RWG, i64, env, i64, i64)
> @@ -66,6 +70,36 @@ DEF_HELPER_FLAGS_2(fcvt_d_lu, TCG_CALL_NO_RWG, i64, env, tl)
>  #endif
>  DEF_HELPER_FLAGS_1(fclass_d, TCG_CALL_NO_RWG_SE, tl, i64)
>
> +/* Floating Point - Half Precision */
> +DEF_HELPER_FLAGS_3(fadd_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
> +DEF_HELPER_FLAGS_3(fsub_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
> +DEF_HELPER_FLAGS_3(fmul_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
> +DEF_HELPER_FLAGS_3(fdiv_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
> +DEF_HELPER_FLAGS_3(fmin_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
> +DEF_HELPER_FLAGS_3(fmax_h, TCG_CALL_NO_RWG, i64, env, i64, i64)
> +DEF_HELPER_FLAGS_2(fsqrt_h, TCG_CALL_NO_RWG, i64, env, i64)
> +DEF_HELPER_FLAGS_3(fle_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
> +DEF_HELPER_FLAGS_3(flt_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
> +DEF_HELPER_FLAGS_3(feq_h, TCG_CALL_NO_RWG, tl, env, i64, i64)
> +DEF_HELPER_FLAGS_2(fcvt_s_h, TCG_CALL_NO_RWG, i64, env, i64)
> +DEF_HELPER_FLAGS_2(fcvt_h_s, TCG_CALL_NO_RWG, i64, env, i64)
> +DEF_HELPER_FLAGS_2(fcvt_d_h, TCG_CALL_NO_RWG, i64, env, i64)
> +DEF_HELPER_FLAGS_2(fcvt_h_d, TCG_CALL_NO_RWG, i64, env, i64)
> +DEF_HELPER_FLAGS_2(fcvt_w_h, TCG_CALL_NO_RWG, tl, env, i64)
> +DEF_HELPER_FLAGS_2(fcvt_wu_h, TCG_CALL_NO_RWG, tl, env, i64)
> +#if defined(TARGET_RISCV64)
> +DEF_HELPER_FLAGS_2(fcvt_l_h, TCG_CALL_NO_RWG, tl, env, i64)
> +DEF_HELPER_FLAGS_2(fcvt_lu_h, TCG_CALL_NO_RWG, tl, env, i64)
> +#endif
> +DEF_HELPER_FLAGS_2(fcvt_h_w, TCG_CALL_NO_RWG, i64, env, tl)
> +DEF_HELPER_FLAGS_2(fcvt_h_wu, TCG_CALL_NO_RWG, i64, env, tl)
> +#if defined(TARGET_RISCV64)
> +DEF_HELPER_FLAGS_2(fcvt_h_l, TCG_CALL_NO_RWG, i64, env, tl)
> +DEF_HELPER_FLAGS_2(fcvt_h_lu, TCG_CALL_NO_RWG, i64, env, tl)
> +#endif
> +DEF_HELPER_FLAGS_1(fclass_h, TCG_CALL_NO_RWG_SE, tl, i64)
> +
> +
>  /* Special functions */
>  DEF_HELPER_3(csrrw, tl, env, tl, tl)
>  DEF_HELPER_4(csrrs, tl, env, tl, tl, tl)
> diff --git a/target/riscv/insn32-64.decode b/target/riscv/insn32-64.decode
> index 86153d9..f6f1ac9 100644
> --- a/target/riscv/insn32-64.decode
> +++ b/target/riscv/insn32-64.decode
> @@ -81,3 +81,9 @@ fmv_x_d    1110001  00000 ..... 000 ..... 1010011 @r2
>  fcvt_d_l   1101001  00010 ..... ... ..... 1010011 @r2_rm
>  fcvt_d_lu  1101001  00011 ..... ... ..... 1010011 @r2_rm
>  fmv_d_x    1111001  00000 ..... 000 ..... 1010011 @r2
> +
> +# *** RV64Zfh Standard Extension (in addition to RV32Zfh) ***
> +fcvt_l_h   1100010  00010 ..... ... ..... 1010011 @r2_rm
> +fcvt_lu_h  1100010  00011 ..... ... ..... 1010011 @r2_rm
> +fcvt_h_l   1101010  00010 ..... ... ..... 1010011 @r2_rm
> +fcvt_h_lu  1101010  00011 ..... ... ..... 1010011 @r2_rm
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index bdd8563..185fc4b 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -581,3 +581,35 @@ vcompress_vm    010111 - ..... ..... 010 ..... 1010111 @r
>
>  vsetvli         0 ........... ..... 111 ..... 1010111  @r2_zimm
>  vsetvl          1000000 ..... ..... 111 ..... 1010111  @r
> +
> +# *** RV32Zfh Extension ***
> +flh        ............   ..... 001 ..... 0000111 @i
> +fsh        .......  ..... ..... 001 ..... 0100111 @s
> +fmadd_h    ..... 10 ..... ..... ... ..... 1000011 @r4_rm
> +fmsub_h    ..... 10 ..... ..... ... ..... 1000111 @r4_rm
> +fnmsub_h   ..... 10 ..... ..... ... ..... 1001011 @r4_rm
> +fnmadd_h   ..... 10 ..... ..... ... ..... 1001111 @r4_rm
> +fadd_h     0000010  ..... ..... ... ..... 1010011 @r_rm
> +fsub_h     0000110  ..... ..... ... ..... 1010011 @r_rm
> +fmul_h     0001010  ..... ..... ... ..... 1010011 @r_rm
> +fdiv_h     0001110  ..... ..... ... ..... 1010011 @r_rm
> +fsqrt_h    0101110  00000 ..... ... ..... 1010011 @r2_rm
> +fsgnj_h    0010010  ..... ..... 000 ..... 1010011 @r
> +fsgnjn_h   0010010  ..... ..... 001 ..... 1010011 @r
> +fsgnjx_h   0010010  ..... ..... 010 ..... 1010011 @r
> +fmin_h     0010110  ..... ..... 000 ..... 1010011 @r
> +fmax_h     0010110  ..... ..... 001 ..... 1010011 @r
> +fcvt_h_s   0100010  00000 ..... ... ..... 1010011 @r2_rm
> +fcvt_s_h   0100000  00010 ..... ... ..... 1010011 @r2_rm
> +fcvt_h_d   0100010  00001 ..... ... ..... 1010011 @r2_rm
> +fcvt_d_h   0100001  00010 ..... ... ..... 1010011 @r2_rm
> +fcvt_w_h   1100010  00000 ..... ... ..... 1010011 @r2_rm
> +fcvt_wu_h  1100010  00001 ..... ... ..... 1010011 @r2_rm
> +fmv_x_h    1110010  00000 ..... 000 ..... 1010011 @r2
> +feq_h      1010010  ..... ..... 010 ..... 1010011 @r
> +flt_h      1010010  ..... ..... 001 ..... 1010011 @r
> +fle_h      1010010  ..... ..... 000 ..... 1010011 @r
> +fclass_h   1110010  00000 ..... 001 ..... 1010011 @r2
> +fcvt_h_w   1101010  00000 ..... ... ..... 1010011 @r2_rm
> +fcvt_h_wu  1101010  00001 ..... ... ..... 1010011 @r2_rm
> +fmv_h_x    1111010  00000 ..... 000 ..... 1010011 @r2
> diff --git a/target/riscv/insn_trans/trans_rvzfh.inc.c b/target/riscv/insn_trans/trans_rvzfh.inc.c
> new file mode 100644
> index 0000000..1b4dede
> --- /dev/null
> +++ b/target/riscv/insn_trans/trans_rvzfh.inc.c
> @@ -0,0 +1,531 @@
> +/*
> + * RISC-V translation routines for the RV64Zfh Standard Extension.
> + *
> + * Copyright (c) 2020 SiFive, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +static bool trans_flh(DisasContext *ctx, arg_flh *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    TCGv t0 = tcg_temp_new();
> +    gen_get_gpr(t0, a->rs1);
> +
> +    tcg_gen_addi_tl(t0, t0, a->imm);
> +
> +    tcg_gen_qemu_ld_i64(cpu_fpr[a->rd], t0, ctx->mem_idx, MO_TEUW);
> +
> +    gen_nanbox_h(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> +
> +    mark_fs_dirty(ctx);
> +    tcg_temp_free(t0);
> +    return true;
> +}
> +
> +static bool trans_fsh(DisasContext *ctx, arg_fsh *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    TCGv t0 = tcg_temp_new();
> +    gen_get_gpr(t0, a->rs1);
> +    tcg_gen_addi_tl(t0, t0, a->imm);
> +
> +    tcg_gen_qemu_st_i64(cpu_fpr[a->rs2], t0, ctx->mem_idx, MO_TEUW);
> +
> +    mark_fs_dirty(ctx);
> +    tcg_temp_free(t0);
> +    return true;
> +}
> +
> +static bool trans_fmadd_h(DisasContext *ctx, arg_fmadd_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fmadd_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
> +                       cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fmsub_h(DisasContext *ctx, arg_fmsub_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fmsub_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
> +                       cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fnmsub_h(DisasContext *ctx, arg_fnmsub_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fnmsub_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
> +                        cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fnmadd_h(DisasContext *ctx, arg_fnmadd_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fnmadd_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
> +                        cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fadd_h(DisasContext *ctx, arg_fadd_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fadd_h(cpu_fpr[a->rd], cpu_env,
> +                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fsub_h(DisasContext *ctx, arg_fsub_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fsub_h(cpu_fpr[a->rd], cpu_env,
> +                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fmul_h(DisasContext *ctx, arg_fmul_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fmul_h(cpu_fpr[a->rd], cpu_env,
> +                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fdiv_h(DisasContext *ctx, arg_fdiv_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fdiv_h(cpu_fpr[a->rd], cpu_env,
> +                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fsqrt_h(DisasContext *ctx, arg_fsqrt_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fsqrt_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fsgnj_h(DisasContext *ctx, arg_fsgnj_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    if (a->rs1 == a->rs2) { /* FMOV */
> +        gen_check_nanbox_h(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
> +    } else {
> +        TCGv_i64 rs1 = tcg_temp_new_i64();
> +        TCGv_i64 rs2 = tcg_temp_new_i64();
> +
> +        gen_check_nanbox_h(rs1, cpu_fpr[a->rs1]);
> +        gen_check_nanbox_h(rs2, cpu_fpr[a->rs2]);
> +
> +        /* This formulation retains the nanboxing of rs2. */
> +        tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 15);
> +        tcg_temp_free_i64(rs1);
> +        tcg_temp_free_i64(rs2);
> +    }
> +
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fsgnjn_h(DisasContext *ctx, arg_fsgnjn_h *a)
> +{
> +    TCGv_i64 rs1, rs2, mask;
> +
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    rs1 = tcg_temp_new_i64();
> +    gen_check_nanbox_h(rs1, cpu_fpr[a->rs1]);
> +
> +    if (a->rs1 == a->rs2) { /* FNEG */
> +        tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(15, 1));
> +    } else {
> +        rs2 = tcg_temp_new_i64();
> +        gen_check_nanbox_h(rs2, cpu_fpr[a->rs2]);
> +
> +        /*
> +         * Replace bit 15 in rs1 with inverse in rs2.
> +         * This formulation retains the nanboxing of rs1.
> +         */
> +        mask = tcg_const_i64(~MAKE_64BIT_MASK(15, 1));
> +        tcg_gen_not_i64(rs2, rs2);
> +        tcg_gen_andc_i64(rs2, rs2, mask);
> +        tcg_gen_and_i64(rs1, mask, rs1);
> +        tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
> +
> +        tcg_temp_free_i64(mask);
> +        tcg_temp_free_i64(rs2);
> +    }
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fsgnjx_h(DisasContext *ctx, arg_fsgnjx_h *a)
> +{
> +    TCGv_i64 rs1, rs2;
> +
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    rs1 = tcg_temp_new_i64();
> +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> +
> +    if (a->rs1 == a->rs2) { /* FABS */
> +        tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(15, 1));
> +    } else {
> +        rs2 = tcg_temp_new_i64();
> +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> +        /*
> +         * Xor bit 15 in rs1 with that in rs2.
> +         * This formulation retains the nanboxing of rs1.
> +         */
> +        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(15, 1));
> +        tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
> +
> +        tcg_temp_free_i64(rs2);
> +    }
> +
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fmin_h(DisasContext *ctx, arg_fmin_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    gen_helper_fmin_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
> +                      cpu_fpr[a->rs2]);
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fmax_h(DisasContext *ctx, arg_fmax_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    gen_helper_fmax_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
> +                      cpu_fpr[a->rs2]);
> +    mark_fs_dirty(ctx);
> +    return true;
> +}
> +
> +static bool trans_fcvt_s_h(DisasContext *ctx, arg_fcvt_s_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fcvt_s_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
> +
> +    mark_fs_dirty(ctx);
> +
> +    return true;
> +}
> +
> +static bool trans_fcvt_d_h(DisasContext *ctx, arg_fcvt_d_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +    REQUIRE_EXT(ctx, RVD);
> +
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fcvt_d_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
> +
> +    mark_fs_dirty(ctx);
> +
> +
> +    return true;
> +}
> +
> +static bool trans_fcvt_h_s(DisasContext *ctx, arg_fcvt_h_s *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fcvt_h_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
> +
> +    mark_fs_dirty(ctx);
> +
> +    return true;
> +}
> +
> +static bool trans_fcvt_h_d(DisasContext *ctx, arg_fcvt_h_d *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +    REQUIRE_EXT(ctx, RVD);
> +
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fcvt_h_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
> +
> +    mark_fs_dirty(ctx);
> +
> +    return true;
> +}
> +
> +static bool trans_feq_h(DisasContext *ctx, arg_feq_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +    TCGv t0 = tcg_temp_new();
> +    gen_helper_feq_h(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
> +    gen_set_gpr(a->rd, t0);
> +    tcg_temp_free(t0);
> +    return true;
> +}
> +
> +static bool trans_flt_h(DisasContext *ctx, arg_flt_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +    TCGv t0 = tcg_temp_new();
> +    gen_helper_flt_h(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
> +    gen_set_gpr(a->rd, t0);
> +    tcg_temp_free(t0);
> +    return true;
> +}
> +
> +static bool trans_fle_h(DisasContext *ctx, arg_fle_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +    TCGv t0 = tcg_temp_new();
> +    gen_helper_fle_h(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
> +    gen_set_gpr(a->rd, t0);
> +    tcg_temp_free(t0);
> +    return true;
> +}
> +
> +static bool trans_fclass_h(DisasContext *ctx, arg_fclass_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    TCGv t0 = tcg_temp_new();
> +
> +    gen_helper_fclass_h(t0, cpu_fpr[a->rs1]);
> +
> +    gen_set_gpr(a->rd, t0);
> +    tcg_temp_free(t0);
> +
> +    return true;
> +}
> +
> +static bool trans_fcvt_w_h(DisasContext *ctx, arg_fcvt_w_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    TCGv t0 = tcg_temp_new();
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fcvt_w_h(t0, cpu_env, cpu_fpr[a->rs1]);
> +    gen_set_gpr(a->rd, t0);
> +    tcg_temp_free(t0);
> +
> +    return true;
> +}
> +
> +static bool trans_fcvt_wu_h(DisasContext *ctx, arg_fcvt_wu_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    TCGv t0 = tcg_temp_new();
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fcvt_wu_h(t0, cpu_env, cpu_fpr[a->rs1]);
> +    gen_set_gpr(a->rd, t0);
> +    tcg_temp_free(t0);
> +
> +    return true;
> +}
> +
> +static bool trans_fcvt_h_w(DisasContext *ctx, arg_fcvt_h_w *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    TCGv t0 = tcg_temp_new();
> +    gen_get_gpr(t0, a->rs1);
> +
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fcvt_h_w(cpu_fpr[a->rd], cpu_env, t0);
> +
> +    mark_fs_dirty(ctx);
> +    tcg_temp_free(t0);
> +
> +    return true;
> +}
> +
> +static bool trans_fcvt_h_wu(DisasContext *ctx, arg_fcvt_h_wu *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    TCGv t0 = tcg_temp_new();
> +    gen_get_gpr(t0, a->rs1);
> +
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fcvt_h_wu(cpu_fpr[a->rd], cpu_env, t0);
> +
> +    mark_fs_dirty(ctx);
> +    tcg_temp_free(t0);
> +
> +    return true;
> +}
> +
> +static bool trans_fmv_x_h(DisasContext *ctx, arg_fmv_x_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    TCGv t0 = tcg_temp_new();
> +
> +#if defined(TARGET_RISCV64)
> +    tcg_gen_ext16s_tl(t0, cpu_fpr[a->rs1]); // 16 bits->64 bits
> +#else
> +    tcg_gen_extrl_i64_i32(t0, cpu_fpr[a->rs1]); //16 bits->32 bits
> +    tcg_gen_ext16s_tl(t0, t0);
> +#endif
> +
> +    gen_set_gpr(a->rd, t0);
> +    tcg_temp_free(t0);
> +
> +    return true;
> +}
> +
> +static bool trans_fmv_h_x(DisasContext *ctx, arg_fmv_h_x *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    TCGv t0 = tcg_temp_new();
> +    gen_get_gpr(t0, a->rs1);
> +
> +    tcg_gen_extu_tl_i64(cpu_fpr[a->rd], t0);
> +    gen_nanbox_h(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> +
> +    mark_fs_dirty(ctx);
> +    tcg_temp_free(t0);
> +
> +    return true;
> +}
> +
> +#ifdef TARGET_RISCV64
> +
> +static bool trans_fcvt_l_h(DisasContext *ctx, arg_fcvt_l_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    TCGv t0 = tcg_temp_new();
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fcvt_l_h(t0, cpu_env, cpu_fpr[a->rs1]);
> +    gen_set_gpr(a->rd, t0);
> +    tcg_temp_free(t0);
> +
> +    return true;
> +}
> +
> +static bool trans_fcvt_lu_h(DisasContext *ctx, arg_fcvt_lu_h *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    TCGv t0 = tcg_temp_new();
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fcvt_lu_h(t0, cpu_env, cpu_fpr[a->rs1]);
> +    gen_set_gpr(a->rd, t0);
> +    tcg_temp_free(t0);
> +
> +    return true;
> +}
> +
> +static bool trans_fcvt_h_l(DisasContext *ctx, arg_fcvt_h_l *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    TCGv t0 = tcg_temp_new();
> +    gen_get_gpr(t0, a->rs1);
> +
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fcvt_h_l(cpu_fpr[a->rd], cpu_env, t0);
> +
> +    mark_fs_dirty(ctx);
> +    tcg_temp_free(t0);
> +
> +    return true;
> +}
> +
> +static bool trans_fcvt_h_lu(DisasContext *ctx, arg_fcvt_h_lu *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_EXT(ctx, RVZfh);
> +
> +    TCGv t0 = tcg_temp_new();
> +    gen_get_gpr(t0, a->rs1);
> +
> +    gen_set_rm(ctx, a->rm);
> +    gen_helper_fcvt_h_lu(cpu_fpr[a->rd], cpu_env, t0);
> +
> +    mark_fs_dirty(ctx);
> +    tcg_temp_free(t0);
> +
> +    return true;
> +}
> +#endif
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index e227534..6dbd8c9 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -769,6 +769,7 @@ static bool gen_shift(DisasContext *ctx, arg_r *a,
>  #include "insn_trans/trans_rvd.inc.c"
>  #include "insn_trans/trans_rvh.inc.c"
>  #include "insn_trans/trans_rvv.inc.c"
> +#include "insn_trans/trans_rvzfh.inc.c"
>  #include "insn_trans/trans_privileged.inc.c"
>
>  /* Include the auto-generated decoder for 16 bit insn */
> --
> 2.7.4
>
>


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-08-25 19:15 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-11  8:59 [PATCH 0/3] RFC: target/riscv: add half-precision floating-point extension Chih-Min Chao
2020-08-11  8:59 ` [PATCH 1/3] target/riscv: add NaN-Boxing helper for half-float Chih-Min Chao
2020-08-11  8:59 ` [PATCH 2/3] target/riscv: Implement zfh extension Chih-Min Chao
2020-08-25 19:00   ` Alistair Francis
2020-08-11  8:59 ` [PATCH 3/3] target/riscv: support 'x-k' in cpu option Chih-Min Chao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).