All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] tcg/tci: Two regression fixes
@ 2023-06-07  5:46 Richard Henderson
  2023-06-07  5:46 ` [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx Richard Henderson
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Richard Henderson @ 2023-06-07  5:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw

Two recent regressions, both related to recent tcg changes.

Our CI does not test TCI with --enable-debug-tcg, which given timeout
constraints is probably correct, but in this case resulted in an
infinite loop on aarch64 multiarch/memory.c with FEAT_LSE2 enabled.

r~

Richard Henderson (2):
  tcg/tci: Adjust passing of MemOpIdx
  tcg/tci: Adjust call-clobbered regs for int128_t

 tcg/tci.c                | 30 +++++++++++++-----------------
 tcg/tci/tcg-target.c.inc | 30 +++++++++---------------------
 2 files changed, 22 insertions(+), 38 deletions(-)

-- 
2.34.1



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx
  2023-06-07  5:46 [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
@ 2023-06-07  5:46 ` Richard Henderson
  2023-06-07  5:46 ` [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t Richard Henderson
  2023-06-07  5:53 ` [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
  2 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2023-06-07  5:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw

Since adding MO_ATOM_MASK, the maximum MemOpIdx requires 15 bits,
which overflows the 12 bit field allocated for TCI memory ops.
Expand the field to 16 bits for 2-operand memory ops, and place
the value in TCG_REG_TMP for 3-operand memory ops (same as we
already do for 4-operand memory ops).

Cures a debug assert for aarch64, with FEAT_LSE enabled.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tci.c                | 30 +++++++++++++-----------------
 tcg/tci/tcg-target.c.inc | 21 ++++-----------------
 2 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/tcg/tci.c b/tcg/tci.c
index 813572ff39..4640902c88 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -106,7 +106,7 @@ static void tci_args_rrm(uint32_t insn, TCGReg *r0,
 {
     *r0 = extract32(insn, 8, 4);
     *r1 = extract32(insn, 12, 4);
-    *m2 = extract32(insn, 20, 12);
+    *m2 = extract32(insn, 16, 16);
 }
 
 static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2)
@@ -141,15 +141,6 @@ static void tci_args_rrrc(uint32_t insn,
     *c3 = extract32(insn, 20, 4);
 }
 
-static void tci_args_rrrm(uint32_t insn,
-                          TCGReg *r0, TCGReg *r1, TCGReg *r2, MemOpIdx *m3)
-{
-    *r0 = extract32(insn, 8, 4);
-    *r1 = extract32(insn, 12, 4);
-    *r2 = extract32(insn, 16, 4);
-    *m3 = extract32(insn, 20, 12);
-}
-
 static void tci_args_rrrbb(uint32_t insn, TCGReg *r0, TCGReg *r1,
                            TCGReg *r2, uint8_t *i3, uint8_t *i4)
 {
@@ -929,8 +920,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
                 tci_args_rrm(insn, &r0, &r1, &oi);
                 taddr = regs[r1];
             } else {
-                tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+                tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
                 taddr = tci_uint64(regs[r2], regs[r1]);
+                oi = regs[r3];
             }
         do_ld_i32:
             regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr);
@@ -941,8 +933,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
                 tci_args_rrm(insn, &r0, &r1, &oi);
                 taddr = (uint32_t)regs[r1];
             } else {
-                tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+                tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
                 taddr = (uint32_t)regs[r2];
+                oi = regs[r3];
             }
             goto do_ld_i64;
         case INDEX_op_qemu_ld_a64_i64:
@@ -972,8 +965,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
                 tci_args_rrm(insn, &r0, &r1, &oi);
                 taddr = regs[r1];
             } else {
-                tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+                tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
                 taddr = tci_uint64(regs[r2], regs[r1]);
+                oi = regs[r3];
             }
         do_st_i32:
             tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr);
@@ -985,9 +979,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
                 tmp64 = regs[r0];
                 taddr = (uint32_t)regs[r1];
             } else {
-                tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+                tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
                 tmp64 = tci_uint64(regs[r1], regs[r0]);
                 taddr = (uint32_t)regs[r2];
+                oi = regs[r3];
             }
             goto do_st_i64;
         case INDEX_op_qemu_st_a64_i64:
@@ -1293,9 +1288,10 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
                                op_name, str_r(r0), str_r(r1), oi);
             break;
         case 3:
-            tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
-            info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %x",
-                               op_name, str_r(r0), str_r(r1), str_r(r2), oi);
+            tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+            info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s",
+                               op_name, str_r(r0), str_r(r1),
+                               str_r(r2), str_r(r3));
             break;
         case 4:
             tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index c9516a5e8b..5b456e1277 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -331,11 +331,11 @@ static void tcg_out_op_rrm(TCGContext *s, TCGOpcode op,
 {
     tcg_insn_unit insn = 0;
 
-    tcg_debug_assert(m2 == extract32(m2, 0, 12));
+    tcg_debug_assert(m2 == extract32(m2, 0, 16));
     insn = deposit32(insn, 0, 8, op);
     insn = deposit32(insn, 8, 4, r0);
     insn = deposit32(insn, 12, 4, r1);
-    insn = deposit32(insn, 20, 12, m2);
+    insn = deposit32(insn, 16, 16, m2);
     tcg_out32(s, insn);
 }
 
@@ -392,20 +392,6 @@ static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op,
     tcg_out32(s, insn);
 }
 
-static void tcg_out_op_rrrm(TCGContext *s, TCGOpcode op,
-                            TCGReg r0, TCGReg r1, TCGReg r2, TCGArg m3)
-{
-    tcg_insn_unit insn = 0;
-
-    tcg_debug_assert(m3 == extract32(m3, 0, 12));
-    insn = deposit32(insn, 0, 8, op);
-    insn = deposit32(insn, 8, 4, r0);
-    insn = deposit32(insn, 12, 4, r1);
-    insn = deposit32(insn, 16, 4, r2);
-    insn = deposit32(insn, 20, 12, m3);
-    tcg_out32(s, insn);
-}
-
 static void tcg_out_op_rrrbb(TCGContext *s, TCGOpcode op, TCGReg r0,
                              TCGReg r1, TCGReg r2, uint8_t b3, uint8_t b4)
 {
@@ -860,7 +846,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (TCG_TARGET_REG_BITS == 64) {
             tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
         } else {
-            tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
+            tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], TCG_REG_TMP);
         }
         break;
     case INDEX_op_qemu_ld_a64_i64:
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t
  2023-06-07  5:46 [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
  2023-06-07  5:46 ` [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx Richard Henderson
@ 2023-06-07  5:46 ` Richard Henderson
  2023-06-07  7:44   ` Philippe Mathieu-Daudé
  2023-06-07  5:53 ` [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
  2 siblings, 1 reply; 5+ messages in thread
From: Richard Henderson @ 2023-06-07  5:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw

We require either 2 or 4 registers to hold int128_t.
Failure to do so results in a register allocation assert.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tci/tcg-target.c.inc | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index 5b456e1277..0037f904f1 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -179,8 +179,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 }
 
 static const int tcg_target_reg_alloc_order[] = {
-    TCG_REG_R2,
-    TCG_REG_R3,
     TCG_REG_R4,
     TCG_REG_R5,
     TCG_REG_R6,
@@ -193,6 +191,9 @@ static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_R13,
     TCG_REG_R14,
     TCG_REG_R15,
+    /* Either 2 or 4 of these are call clobbered, so use them last. */
+    TCG_REG_R3,
+    TCG_REG_R2,
     TCG_REG_R1,
     TCG_REG_R0,
 };
@@ -934,11 +935,11 @@ static void tcg_target_init(TCGContext *s)
     /*
      * The interpreter "registers" are in the local stack frame and
      * cannot be clobbered by the called helper functions.  However,
-     * the interpreter assumes a 64-bit return value and assigns to
+     * the interpreter assumes a 128-bit return value and assigns to
      * the return value registers.
      */
     tcg_target_call_clobber_regs =
-        MAKE_64BIT_MASK(TCG_REG_R0, 64 / TCG_TARGET_REG_BITS);
+        MAKE_64BIT_MASK(TCG_REG_R0, 128 / TCG_TARGET_REG_BITS);
 
     s->reserved_regs = 0;
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 0/2] tcg/tci: Two regression fixes
  2023-06-07  5:46 [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
  2023-06-07  5:46 ` [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx Richard Henderson
  2023-06-07  5:46 ` [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t Richard Henderson
@ 2023-06-07  5:53 ` Richard Henderson
  2 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2023-06-07  5:53 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw

On 6/6/23 22:46, Richard Henderson wrote:
> Two recent regressions, both related to recent tcg changes.
> 
> Our CI does not test TCI with --enable-debug-tcg, which given timeout
> constraints is probably correct, but in this case resulted in an
> infinite loop on aarch64 multiarch/memory.c with FEAT_LSE2 enabled.

To expand on that: with --enable-debug-tcg, assertions fire and catch the bug; without the 
assertions, the generated bytecode is incorrect, which leads to the loop.


r~


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t
  2023-06-07  5:46 ` [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t Richard Henderson
@ 2023-06-07  7:44   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 5+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-06-07  7:44 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: sw

On 7/6/23 07:46, Richard Henderson wrote:
> We require either 2 or 4 registers to hold int128_t.
> Failure to do so results in a register allocation assert.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/tci/tcg-target.c.inc | 9 +++++----
>   1 file changed, 5 insertions(+), 4 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-06-07  7:44 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-07  5:46 [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson
2023-06-07  5:46 ` [PATCH 1/2] tcg/tci: Adjust passing of MemOpIdx Richard Henderson
2023-06-07  5:46 ` [PATCH 2/2] tcg/tci: Adjust call-clobbered regs for int128_t Richard Henderson
2023-06-07  7:44   ` Philippe Mathieu-Daudé
2023-06-07  5:53 ` [PATCH 0/2] tcg/tci: Two regression fixes Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.