* [PATCH] target/loongarch: Remove cpu_fcsr0
@ 2022-08-06 17:08 Richard Henderson
2022-08-07 3:36 ` huqi
2022-08-08 1:23 ` gaosong
0 siblings, 2 replies; 3+ messages in thread
From: Richard Henderson @ 2022-08-06 17:08 UTC (permalink / raw)
To: qemu-devel; +Cc: Qi Hu, Song Gao, Feiyang Chen
All of the fpu operations are defined with TCG_CALL_NO_WG, but they
all modify FCSR0. The most efficient way to fix this is to remove
cpu_fcsr0, and instead use explicit load and store operations for the
two instructions that manipulate that value.
Cc: Qi Hu <huqi@loongson.cn>
Cc: Song Gao <gaosong@loongson.cn>
Reported-by: Feiyang Chen <chenfeiyang@loongson.cn>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/loongarch/helper.h | 2 +-
target/loongarch/fpu_helper.c | 4 +--
target/loongarch/translate.c | 3 --
tests/tcg/loongarch64/test_fcsr.c | 15 +++++++++
target/loongarch/insn_trans/trans_fmov.c.inc | 33 ++++++++++----------
tests/tcg/loongarch64/Makefile.target | 1 +
6 files changed, 36 insertions(+), 22 deletions(-)
create mode 100644 tests/tcg/loongarch64/test_fcsr.c
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index cbbe008f32..9c01823a26 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -91,7 +91,7 @@ DEF_HELPER_2(ftint_w_d, i64, env, i64)
DEF_HELPER_2(frint_s, i64, env, i64)
DEF_HELPER_2(frint_d, i64, env, i64)
-DEF_HELPER_FLAGS_2(set_rounding_mode, TCG_CALL_NO_RWG, void, env, i32)
+DEF_HELPER_FLAGS_1(set_rounding_mode, TCG_CALL_NO_RWG, void, env)
DEF_HELPER_1(rdtime_d, i64, env)
diff --git a/target/loongarch/fpu_helper.c b/target/loongarch/fpu_helper.c
index bd76529219..4b9637210a 100644
--- a/target/loongarch/fpu_helper.c
+++ b/target/loongarch/fpu_helper.c
@@ -872,8 +872,8 @@ uint64_t helper_ftint_w_d(CPULoongArchState *env, uint64_t fj)
return fd;
}
-void helper_set_rounding_mode(CPULoongArchState *env, uint32_t fcsr0)
+void helper_set_rounding_mode(CPULoongArchState *env)
{
- set_float_rounding_mode(ieee_rm[(fcsr0 >> FCSR0_RM) & 0x3],
+ set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3],
&env->fp_status);
}
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
index c9afd11420..51ba291430 100644
--- a/target/loongarch/translate.c
+++ b/target/loongarch/translate.c
@@ -22,7 +22,6 @@
/* Global register indices */
TCGv cpu_gpr[32], cpu_pc;
static TCGv cpu_lladdr, cpu_llval;
-TCGv_i32 cpu_fcsr0;
TCGv_i64 cpu_fpr[32];
#include "exec/gen-icount.h"
@@ -266,8 +265,6 @@ void loongarch_translate_init(void)
}
cpu_pc = tcg_global_mem_new(cpu_env, offsetof(CPULoongArchState, pc), "pc");
- cpu_fcsr0 = tcg_global_mem_new_i32(cpu_env,
- offsetof(CPULoongArchState, fcsr0), "fcsr0");
cpu_lladdr = tcg_global_mem_new(cpu_env,
offsetof(CPULoongArchState, lladdr), "lladdr");
cpu_llval = tcg_global_mem_new(cpu_env,
diff --git a/tests/tcg/loongarch64/test_fcsr.c b/tests/tcg/loongarch64/test_fcsr.c
new file mode 100644
index 0000000000..ad3609eb99
--- /dev/null
+++ b/tests/tcg/loongarch64/test_fcsr.c
@@ -0,0 +1,15 @@
+#include <assert.h>
+
+int main()
+{
+ unsigned fcsr;
+
+ asm("movgr2fcsr $r0,$r0\n\t"
+ "movgr2fr.d $f0,$r0\n\t"
+ "fdiv.d $f0,$f0,$f0\n\t"
+ "movfcsr2gr %0,$r0"
+ : "=r"(fcsr) : : "f0");
+
+ assert(fcsr & (16 << 16)); /* Invalid */
+ return 0;
+}
diff --git a/target/loongarch/insn_trans/trans_fmov.c.inc b/target/loongarch/insn_trans/trans_fmov.c.inc
index 24753d4568..5537e3dd35 100644
--- a/target/loongarch/insn_trans/trans_fmov.c.inc
+++ b/target/loongarch/insn_trans/trans_fmov.c.inc
@@ -60,38 +60,39 @@ static bool trans_movgr2fcsr(DisasContext *ctx, arg_movgr2fcsr *a)
TCGv Rj = gpr_src(ctx, a->rj, EXT_NONE);
if (mask == UINT32_MAX) {
- tcg_gen_extrl_i64_i32(cpu_fcsr0, Rj);
+ tcg_gen_st32_i64(Rj, cpu_env, offsetof(CPULoongArchState, fcsr0));
} else {
+ TCGv_i32 fcsr0 = tcg_temp_new_i32();
TCGv_i32 temp = tcg_temp_new_i32();
+ tcg_gen_ld_i32(fcsr0, cpu_env, offsetof(CPULoongArchState, fcsr0));
tcg_gen_extrl_i64_i32(temp, Rj);
tcg_gen_andi_i32(temp, temp, mask);
- tcg_gen_andi_i32(cpu_fcsr0, cpu_fcsr0, ~mask);
- tcg_gen_or_i32(cpu_fcsr0, cpu_fcsr0, temp);
+ tcg_gen_andi_i32(fcsr0, fcsr0, ~mask);
+ tcg_gen_or_i32(fcsr0, fcsr0, temp);
+ tcg_gen_st_i32(fcsr0, cpu_env, offsetof(CPULoongArchState, fcsr0));
+
tcg_temp_free_i32(temp);
-
- /*
- * Install the new rounding mode to fpu_status, if changed.
- * Note that FCSR3 is exactly the rounding mode field.
- */
- if (mask != FCSR0_M3) {
- return true;
- }
+ tcg_temp_free_i32(fcsr0);
}
- gen_helper_set_rounding_mode(cpu_env, cpu_fcsr0);
+ /*
+ * Install the new rounding mode to fpu_status, if changed.
+ * Note that FCSR3 is exactly the rounding mode field.
+ */
+ if (mask & FCSR0_M3) {
+ gen_helper_set_rounding_mode(cpu_env);
+ }
return true;
}
static bool trans_movfcsr2gr(DisasContext *ctx, arg_movfcsr2gr *a)
{
- TCGv_i32 temp = tcg_temp_new_i32();
TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- tcg_gen_andi_i32(temp, cpu_fcsr0, fcsr_mask[a->fcsrs]);
- tcg_gen_ext_i32_i64(dest, temp);
+ tcg_gen_ld32u_i64(dest, cpu_env, offsetof(CPULoongArchState, fcsr0));
+ tcg_gen_andi_i64(dest, dest, fcsr_mask[a->fcsrs]);
gen_set_gpr(a->rd, dest, EXT_NONE);
- tcg_temp_free_i32(temp);
return true;
}
diff --git a/tests/tcg/loongarch64/Makefile.target b/tests/tcg/loongarch64/Makefile.target
index 0115de78ef..00030a1026 100644
--- a/tests/tcg/loongarch64/Makefile.target
+++ b/tests/tcg/loongarch64/Makefile.target
@@ -15,5 +15,6 @@ LOONGARCH64_TESTS += test_div
LOONGARCH64_TESTS += test_fclass
LOONGARCH64_TESTS += test_fpcom
LOONGARCH64_TESTS += test_pcadd
+LOONGARCH64_TESTS += test_fcsr
TESTS += $(LOONGARCH64_TESTS)
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] target/loongarch: Remove cpu_fcsr0
2022-08-06 17:08 [PATCH] target/loongarch: Remove cpu_fcsr0 Richard Henderson
@ 2022-08-07 3:36 ` huqi
2022-08-08 1:23 ` gaosong
1 sibling, 0 replies; 3+ messages in thread
From: huqi @ 2022-08-07 3:36 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel, Song Gao, Feiyang Chen
> On Aug 7, 2022, at 01:09, Richard Henderson <richard.henderson@linaro.org> wrote:
> All of the fpu operations are defined with TCG_CALL_NO_WG, but they
> all modify FCSR0. The most efficient way to fix this is to remove
> cpu_fcsr0, and instead use explicit load and store operations for the
> two instructions that manipulate that value.
>
> Cc: Qi Hu <huqi@loongson.cn>
> Cc: Song Gao <gaosong@loongson.cn>
> Reported-by: Feiyang Chen <chenfeiyang@loongson.cn>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/loongarch/helper.h | 2 +-
> target/loongarch/fpu_helper.c | 4 +--
> target/loongarch/translate.c | 3 --
> tests/tcg/loongarch64/test_fcsr.c | 15 +++++++++
> target/loongarch/insn_trans/trans_fmov.c.inc | 33 ++++++++++----------
> tests/tcg/loongarch64/Makefile.target | 1 +
> 6 files changed, 36 insertions(+), 22 deletions(-)
> create mode 100644 tests/tcg/loongarch64/test_fcsr.c
>
> diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
> index cbbe008f32..9c01823a26 100644
> --- a/target/loongarch/helper.h
> +++ b/target/loongarch/helper.h
> @@ -91,7 +91,7 @@ DEF_HELPER_2(ftint_w_d, i64, env, i64)
> DEF_HELPER_2(frint_s, i64, env, i64)
> DEF_HELPER_2(frint_d, i64, env, i64)
>
> -DEF_HELPER_FLAGS_2(set_rounding_mode, TCG_CALL_NO_RWG, void, env, i32)
> +DEF_HELPER_FLAGS_1(set_rounding_mode, TCG_CALL_NO_RWG, void, env)
>
> DEF_HELPER_1(rdtime_d, i64, env)
>
> diff --git a/target/loongarch/fpu_helper.c b/target/loongarch/fpu_helper.c
> index bd76529219..4b9637210a 100644
> --- a/target/loongarch/fpu_helper.c
> +++ b/target/loongarch/fpu_helper.c
> @@ -872,8 +872,8 @@ uint64_t helper_ftint_w_d(CPULoongArchState *env, uint64_t fj)
> return fd;
> }
>
> -void helper_set_rounding_mode(CPULoongArchState *env, uint32_t fcsr0)
> +void helper_set_rounding_mode(CPULoongArchState *env)
> {
> - set_float_rounding_mode(ieee_rm[(fcsr0 >> FCSR0_RM) & 0x3],
> + set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3],
> &env->fp_status);
> }
> diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
> index c9afd11420..51ba291430 100644
> --- a/target/loongarch/translate.c
> +++ b/target/loongarch/translate.c
> @@ -22,7 +22,6 @@
> /* Global register indices */
> TCGv cpu_gpr[32], cpu_pc;
> static TCGv cpu_lladdr, cpu_llval;
> -TCGv_i32 cpu_fcsr0;
> TCGv_i64 cpu_fpr[32];
>
> #include "exec/gen-icount.h"
> @@ -266,8 +265,6 @@ void loongarch_translate_init(void)
> }
>
> cpu_pc = tcg_global_mem_new(cpu_env, offsetof(CPULoongArchState, pc), "pc");
> - cpu_fcsr0 = tcg_global_mem_new_i32(cpu_env,
> - offsetof(CPULoongArchState, fcsr0), "fcsr0");
> cpu_lladdr = tcg_global_mem_new(cpu_env,
> offsetof(CPULoongArchState, lladdr), "lladdr");
> cpu_llval = tcg_global_mem_new(cpu_env,
> diff --git a/tests/tcg/loongarch64/test_fcsr.c b/tests/tcg/loongarch64/test_fcsr.c
> new file mode 100644
> index 0000000000..ad3609eb99
> --- /dev/null
> +++ b/tests/tcg/loongarch64/test_fcsr.c
> @@ -0,0 +1,15 @@
> +#include <assert.h>
> +
> +int main()
> +{
> + unsigned fcsr;
> +
> + asm("movgr2fcsr $r0,$r0\n\t"
> + "movgr2fr.d $f0,$r0\n\t"
> + "fdiv.d $f0,$f0,$f0\n\t"
> + "movfcsr2gr %0,$r0"
> + : "=r"(fcsr) : : "f0");
> +
> + assert(fcsr & (16 << 16)); /* Invalid */
> + return 0;
> +}
> diff --git a/target/loongarch/insn_trans/trans_fmov.c.inc b/target/loongarch/insn_trans/trans_fmov.c.inc
> index 24753d4568..5537e3dd35 100644
> --- a/target/loongarch/insn_trans/trans_fmov.c.inc
> +++ b/target/loongarch/insn_trans/trans_fmov.c.inc
> @@ -60,38 +60,39 @@ static bool trans_movgr2fcsr(DisasContext *ctx, arg_movgr2fcsr *a)
> TCGv Rj = gpr_src(ctx, a->rj, EXT_NONE);
>
> if (mask == UINT32_MAX) {
> - tcg_gen_extrl_i64_i32(cpu_fcsr0, Rj);
> + tcg_gen_st32_i64(Rj, cpu_env, offsetof(CPULoongArchState, fcsr0));
> } else {
> + TCGv_i32 fcsr0 = tcg_temp_new_i32();
> TCGv_i32 temp = tcg_temp_new_i32();
>
> + tcg_gen_ld_i32(fcsr0, cpu_env, offsetof(CPULoongArchState, fcsr0));
> tcg_gen_extrl_i64_i32(temp, Rj);
> tcg_gen_andi_i32(temp, temp, mask);
> - tcg_gen_andi_i32(cpu_fcsr0, cpu_fcsr0, ~mask);
> - tcg_gen_or_i32(cpu_fcsr0, cpu_fcsr0, temp);
> + tcg_gen_andi_i32(fcsr0, fcsr0, ~mask);
> + tcg_gen_or_i32(fcsr0, fcsr0, temp);
> + tcg_gen_st_i32(fcsr0, cpu_env, offsetof(CPULoongArchState, fcsr0));
> +
> tcg_temp_free_i32(temp);
> -
> - /*
> - * Install the new rounding mode to fpu_status, if changed.
> - * Note that FCSR3 is exactly the rounding mode field.
> - */
> - if (mask != FCSR0_M3) {
> - return true;
> - }
> + tcg_temp_free_i32(fcsr0);
> }
> - gen_helper_set_rounding_mode(cpu_env, cpu_fcsr0);
>
> + /*
> + * Install the new rounding mode to fpu_status, if changed.
> + * Note that FCSR3 is exactly the rounding mode field.
> + */
> + if (mask & FCSR0_M3) {
> + gen_helper_set_rounding_mode(cpu_env);
> + }
> return true;
> }
>
> static bool trans_movfcsr2gr(DisasContext *ctx, arg_movfcsr2gr *a)
> {
> - TCGv_i32 temp = tcg_temp_new_i32();
> TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
>
> - tcg_gen_andi_i32(temp, cpu_fcsr0, fcsr_mask[a->fcsrs]);
> - tcg_gen_ext_i32_i64(dest, temp);
> + tcg_gen_ld32u_i64(dest, cpu_env, offsetof(CPULoongArchState, fcsr0));
> + tcg_gen_andi_i64(dest, dest, fcsr_mask[a->fcsrs]);
> gen_set_gpr(a->rd, dest, EXT_NONE);
> - tcg_temp_free_i32(temp);
>
> return true;
> }
> diff --git a/tests/tcg/loongarch64/Makefile.target b/tests/tcg/loongarch64/Makefile.target
> index 0115de78ef..00030a1026 100644
> --- a/tests/tcg/loongarch64/Makefile.target
> +++ b/tests/tcg/loongarch64/Makefile.target
> @@ -15,5 +15,6 @@ LOONGARCH64_TESTS += test_div
> LOONGARCH64_TESTS += test_fclass
> LOONGARCH64_TESTS += test_fpcom
> LOONGARCH64_TESTS += test_pcadd
> +LOONGARCH64_TESTS += test_fcsr
>
> TESTS += $(LOONGARCH64_TESTS)
> --
> 2.34.1
Acked-by: Qi Hu <huqi@loongson.cn>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] target/loongarch: Remove cpu_fcsr0
2022-08-06 17:08 [PATCH] target/loongarch: Remove cpu_fcsr0 Richard Henderson
2022-08-07 3:36 ` huqi
@ 2022-08-08 1:23 ` gaosong
1 sibling, 0 replies; 3+ messages in thread
From: gaosong @ 2022-08-08 1:23 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: Qi Hu, Feiyang Chen
On 2022/8/7 上午1:08, Richard Henderson wrote:
> All of the fpu operations are defined with TCG_CALL_NO_WG, but they
> all modify FCSR0. The most efficient way to fix this is to remove
> cpu_fcsr0, and instead use explicit load and store operations for the
> two instructions that manipulate that value.
>
> Cc: Qi Hu<huqi@loongson.cn>
> Cc: Song Gao<gaosong@loongson.cn>
> Reported-by: Feiyang Chen<chenfeiyang@loongson.cn>
> Signed-off-by: Richard Henderson<richard.henderson@linaro.org>
> ---
> target/loongarch/helper.h | 2 +-
> target/loongarch/fpu_helper.c | 4 +--
> target/loongarch/translate.c | 3 --
> tests/tcg/loongarch64/test_fcsr.c | 15 +++++++++
> target/loongarch/insn_trans/trans_fmov.c.inc | 33 ++++++++++----------
> tests/tcg/loongarch64/Makefile.target | 1 +
> 6 files changed, 36 insertions(+), 22 deletions(-)
> create mode 100644 tests/tcg/loongarch64/test_fcsr.c
Reviewed-by: Song Gao <gaosong@loongson.cn>
Thanks.
Song Gao
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2022-08-08 1:24 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-06 17:08 [PATCH] target/loongarch: Remove cpu_fcsr0 Richard Henderson
2022-08-07 3:36 ` huqi
2022-08-08 1:23 ` gaosong
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.