All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/2] target/arm: vector tail cleanups
@ 2020-05-19 21:24 Richard Henderson
  2020-05-19 21:24 ` [PATCH v3 1/2] target/arm: Use tcg_gen_gvec_mov for clear_vec_high Richard Henderson
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Richard Henderson @ 2020-05-19 21:24 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell

Version 3 fixes the reported bug in EXT.

I should make sure I have fixed the bug wherein RISU prints a
mismatch and still exits with success, which hid this problem
in the scrollback.


r~


Richard Henderson (2):
  target/arm: Use tcg_gen_gvec_mov for clear_vec_high
  target/arm: Use clear_vec_high more effectively

 target/arm/translate-a64.c | 63 ++++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 29 deletions(-)

-- 
2.20.1



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v3 1/2] target/arm: Use tcg_gen_gvec_mov for clear_vec_high
  2020-05-19 21:24 [PATCH v3 0/2] target/arm: vector tail cleanups Richard Henderson
@ 2020-05-19 21:24 ` Richard Henderson
  2020-05-19 21:24 ` [PATCH v3 2/2] target/arm: Use clear_vec_high more effectively Richard Henderson
  2020-05-21 16:15 ` [PATCH v3 0/2] target/arm: vector tail cleanups Peter Maydell
  2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2020-05-19 21:24 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, Alex Bennée

The 8-byte store for the end a !is_q operation can be
merged with the other stores.  Use a no-op vector move
to trigger the expand_clr portion of tcg_gen_gvec_mov.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/translate-a64.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 991e451644..4f6edb2892 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -496,14 +496,8 @@ static void clear_vec_high(DisasContext *s, bool is_q, int rd)
     unsigned ofs = fp_reg_offset(s, rd, MO_64);
     unsigned vsz = vec_full_reg_size(s);
 
-    if (!is_q) {
-        TCGv_i64 tcg_zero = tcg_const_i64(0);
-        tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
-        tcg_temp_free_i64(tcg_zero);
-    }
-    if (vsz > 16) {
-        tcg_gen_gvec_dup_imm(MO_64, ofs + 16, vsz - 16, vsz - 16, 0);
-    }
+    /* Nop move, with side effect of clearing the tail. */
+    tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
 }
 
 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v3 2/2] target/arm: Use clear_vec_high more effectively
  2020-05-19 21:24 [PATCH v3 0/2] target/arm: vector tail cleanups Richard Henderson
  2020-05-19 21:24 ` [PATCH v3 1/2] target/arm: Use tcg_gen_gvec_mov for clear_vec_high Richard Henderson
@ 2020-05-19 21:24 ` Richard Henderson
  2020-05-21 16:15 ` [PATCH v3 0/2] target/arm: vector tail cleanups Peter Maydell
  2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2020-05-19 21:24 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, Alex Bennée

Do not explicitly store zero to the NEON high part
when we can pass !is_q to clear_vec_high.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/translate-a64.c | 53 +++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 4f6edb2892..874f3eb4f9 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -900,11 +900,10 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 {
     /* This always zero-extends and writes to a full 128 bit wide vector */
     TCGv_i64 tmplo = tcg_temp_new_i64();
-    TCGv_i64 tmphi;
+    TCGv_i64 tmphi = NULL;
 
     if (size < 4) {
         MemOp memop = s->be_data + size;
-        tmphi = tcg_const_i64(0);
         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
     } else {
         bool be = s->be_data == MO_BE;
@@ -922,12 +921,13 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
     }
 
     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
-    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
-
     tcg_temp_free_i64(tmplo);
-    tcg_temp_free_i64(tmphi);
 
-    clear_vec_high(s, true, destidx);
+    if (tmphi) {
+        tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
+        tcg_temp_free_i64(tmphi);
+    }
+    clear_vec_high(s, tmphi != NULL, destidx);
 }
 
 /*
@@ -6934,7 +6934,6 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
             read_vec_element(s, tcg_resh, rm, 0, MO_64);
             do_ext64(s, tcg_resh, tcg_resl, pos);
         }
-        tcg_gen_movi_i64(tcg_resh, 0);
     } else {
         TCGv_i64 tcg_hh;
         typedef struct {
@@ -6964,9 +6963,11 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
 
     write_vec_element(s, tcg_resl, rd, 0, MO_64);
     tcg_temp_free_i64(tcg_resl);
-    write_vec_element(s, tcg_resh, rd, 1, MO_64);
+    if (is_q) {
+        write_vec_element(s, tcg_resh, rd, 1, MO_64);
+    }
     tcg_temp_free_i64(tcg_resh);
-    clear_vec_high(s, true, rd);
+    clear_vec_high(s, is_q, rd);
 }
 
 /* TBL/TBX
@@ -7003,17 +7004,21 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
      * the input.
      */
     tcg_resl = tcg_temp_new_i64();
-    tcg_resh = tcg_temp_new_i64();
+    tcg_resh = NULL;
 
     if (is_tblx) {
         read_vec_element(s, tcg_resl, rd, 0, MO_64);
     } else {
         tcg_gen_movi_i64(tcg_resl, 0);
     }
-    if (is_tblx && is_q) {
-        read_vec_element(s, tcg_resh, rd, 1, MO_64);
-    } else {
-        tcg_gen_movi_i64(tcg_resh, 0);
+
+    if (is_q) {
+        tcg_resh = tcg_temp_new_i64();
+        if (is_tblx) {
+            read_vec_element(s, tcg_resh, rd, 1, MO_64);
+        } else {
+            tcg_gen_movi_i64(tcg_resh, 0);
+        }
     }
 
     tcg_idx = tcg_temp_new_i64();
@@ -7033,9 +7038,12 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
 
     write_vec_element(s, tcg_resl, rd, 0, MO_64);
     tcg_temp_free_i64(tcg_resl);
-    write_vec_element(s, tcg_resh, rd, 1, MO_64);
-    tcg_temp_free_i64(tcg_resh);
-    clear_vec_high(s, true, rd);
+
+    if (is_q) {
+        write_vec_element(s, tcg_resh, rd, 1, MO_64);
+        tcg_temp_free_i64(tcg_resh);
+    }
+    clear_vec_high(s, is_q, rd);
 }
 
 /* ZIP/UZP/TRN
@@ -7072,7 +7080,7 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
     }
 
     tcg_resl = tcg_const_i64(0);
-    tcg_resh = tcg_const_i64(0);
+    tcg_resh = is_q ? tcg_const_i64(0) : NULL;
     tcg_res = tcg_temp_new_i64();
 
     for (i = 0; i < elements; i++) {
@@ -7123,9 +7131,12 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
 
     write_vec_element(s, tcg_resl, rd, 0, MO_64);
     tcg_temp_free_i64(tcg_resl);
-    write_vec_element(s, tcg_resh, rd, 1, MO_64);
-    tcg_temp_free_i64(tcg_resh);
-    clear_vec_high(s, true, rd);
+
+    if (is_q) {
+        write_vec_element(s, tcg_resh, rd, 1, MO_64);
+        tcg_temp_free_i64(tcg_resh);
+    }
+    clear_vec_high(s, is_q, rd);
 }
 
 /*
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3 0/2] target/arm: vector tail cleanups
  2020-05-19 21:24 [PATCH v3 0/2] target/arm: vector tail cleanups Richard Henderson
  2020-05-19 21:24 ` [PATCH v3 1/2] target/arm: Use tcg_gen_gvec_mov for clear_vec_high Richard Henderson
  2020-05-19 21:24 ` [PATCH v3 2/2] target/arm: Use clear_vec_high more effectively Richard Henderson
@ 2020-05-21 16:15 ` Peter Maydell
  2 siblings, 0 replies; 4+ messages in thread
From: Peter Maydell @ 2020-05-21 16:15 UTC (permalink / raw)
  To: Richard Henderson; +Cc: QEMU Developers

On Tue, 19 May 2020 at 22:24, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Version 3 fixes the reported bug in EXT.
>
> I should make sure I have fixed the bug wherein RISU prints a
> mismatch and still exits with success, which hid this problem
> in the scrollback.


Applied to target-arm.next, thanks.

-- PMM


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-05-21 16:16 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-19 21:24 [PATCH v3 0/2] target/arm: vector tail cleanups Richard Henderson
2020-05-19 21:24 ` [PATCH v3 1/2] target/arm: Use tcg_gen_gvec_mov for clear_vec_high Richard Henderson
2020-05-19 21:24 ` [PATCH v3 2/2] target/arm: Use clear_vec_high more effectively Richard Henderson
2020-05-21 16:15 ` [PATCH v3 0/2] target/arm: vector tail cleanups Peter Maydell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.