All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/8] target/ppc: Move fixed-point insns to decodetree.
@ 2024-04-16  6:39 Chinmay Rath
  2024-04-16  6:39 ` [PATCH 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions " Chinmay Rath
                   ` (7 more replies)
  0 siblings, 8 replies; 27+ messages in thread
From: Chinmay Rath @ 2024-04-16  6:39 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, richard.henderson, harshpb

Moving all fixed-point instructions of the following type to decodetree
specification : arithmetic, compare, trap, select and logical.

Chinmay Rath (8):
  target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree.
  target/ppc: Make divw[u] handler method decodetree compatible.
  target/ppc: Move divw[u, e, eu] instructions to decodetree.
  target/ppc: Move neg, darn, mod{sw, uw} to decodetree.
  target/ppc: Move multiply fixed-point insns (64-bit operands) to
    decodetree.
  target/ppc: Move div/mod fixed-point insns (64 bits operands) to
    decodetree.
  target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to
    decodetree.
  target/ppc: Move logical fixed-point instructions to decodetree.

 target/ppc/helper.h                        |  26 +-
 target/ppc/insn32.decode                   |  93 +++
 target/ppc/excp_helper.c                   |   4 +-
 target/ppc/int_helper.c                    |  24 +-
 target/ppc/translate.c                     | 841 +--------------------
 target/ppc/translate/fixedpoint-impl.c.inc | 639 ++++++++++++++++
 6 files changed, 770 insertions(+), 857 deletions(-)

-- 
2.39.3



^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree.
  2024-04-16  6:39 [PATCH 0/8] target/ppc: Move fixed-point insns to decodetree Chinmay Rath
@ 2024-04-16  6:39 ` Chinmay Rath
  2024-04-16 17:56   ` Richard Henderson
  2024-04-16  6:39 ` [PATCH 2/8] target/ppc: Make divw[u] handler method decodetree compatible Chinmay Rath
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 27+ messages in thread
From: Chinmay Rath @ 2024-04-16  6:39 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, richard.henderson, harshpb

Moving the following instructions to decodetree specification :
	mulli                   	: D-form
	mul{lw, lwo, hw, hwu}[.]	: XO-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
 target/ppc/insn32.decode                   |  9 +++
 target/ppc/translate.c                     | 89 ----------------------
 target/ppc/translate/fixedpoint-impl.c.inc | 71 +++++++++++++++++
 3 files changed, 80 insertions(+), 89 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index eada59f59f..0184680db8 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -193,6 +193,9 @@
 &XO_ta          rt ra oe:bool rc:bool
 @XO_ta          ...... rt:5 ra:5 ..... oe:1 ......... rc:1      &XO_ta
 
+&XO_tab_rc      rt ra rb rc:bool
+@XO_tab_rc      ...... rt:5 ra:5 rb:5 . ......... rc:1          &XO_tab_rc
+
 %xx_xt          0:1 21:5
 %xx_xb          1:1 11:5
 %xx_xa          2:1 16:5
@@ -353,6 +356,12 @@ SUBFE           011111 ..... ..... ..... . 010001000 .  @XO
 SUBFME          011111 ..... ..... ----- . 011101000 .  @XO_ta
 SUBFZE          011111 ..... ..... ----- . 011001000 .  @XO_ta
 
+MULLI           000111 ..... ..... ................     @D
+MULLW           011111 ..... ..... ..... 0 011101011 .  @XO_tab_rc
+MULLWO          011111 ..... ..... ..... 1 011101011 .  @XO_tab_rc
+MULHW           011111 ..... ..... ..... - 001001011 .  @XO_tab_rc
+MULHWU          011111 ..... ..... ..... - 000001011 .  @XO_tab_rc
+
 ## Fixed-Point Logical Instructions
 
 CFUGED          011111 ..... ..... ..... 0011011100 -   @X
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 93ffec787c..c45547a770 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1948,90 +1948,6 @@ GEN_INT_ARITH_MODD(modud, 0x08, 0);
 GEN_INT_ARITH_MODD(modsd, 0x18, 1);
 #endif
 
-/* mulhw  mulhw. */
-static void gen_mulhw(DisasContext *ctx)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGv_i32 t1 = tcg_temp_new_i32();
-
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_muls2_i32(t0, t1, t0, t1);
-    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-    }
-}
-
-/* mulhwu  mulhwu.  */
-static void gen_mulhwu(DisasContext *ctx)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGv_i32 t1 = tcg_temp_new_i32();
-
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_mulu2_i32(t0, t1, t0, t1);
-    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-    }
-}
-
-/* mullw  mullw. */
-static void gen_mullw(DisasContext *ctx)
-{
-#if defined(TARGET_PPC64)
-    TCGv_i64 t0, t1;
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-    tcg_gen_ext32s_tl(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_ext32s_tl(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_mul_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
-#else
-    tcg_gen_mul_i32(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                    cpu_gpr[rB(ctx->opcode)]);
-#endif
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-    }
-}
-
-/* mullwo  mullwo. */
-static void gen_mullwo(DisasContext *ctx)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGv_i32 t1 = tcg_temp_new_i32();
-
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_muls2_i32(t0, t1, t0, t1);
-#if defined(TARGET_PPC64)
-    tcg_gen_concat_i32_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
-#else
-    tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], t0);
-#endif
-
-    tcg_gen_sari_i32(t0, t0, 31);
-    tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
-    tcg_gen_extu_i32_tl(cpu_ov, t0);
-    if (is_isa300(ctx)) {
-        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
-    }
-    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
-
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-    }
-}
-
-/* mulli */
-static void gen_mulli(DisasContext *ctx)
-{
-    tcg_gen_muli_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                    SIMM(ctx->opcode));
-}
-
 #if defined(TARGET_PPC64)
 /* mulhd  mulhd. */
 static void gen_mulhd(DisasContext *ctx)
@@ -6430,11 +6346,6 @@ GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL),
-GEN_HANDLER(mulhw, 0x1F, 0x0B, 0x02, 0x00000400, PPC_INTEGER),
-GEN_HANDLER(mulhwu, 0x1F, 0x0B, 0x00, 0x00000400, PPC_INTEGER),
-GEN_HANDLER(mullw, 0x1F, 0x0B, 0x07, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(mullwo, 0x1F, 0x0B, 0x17, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(mulli, 0x07, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(mulld, 0x1F, 0x09, 0x07, 0x00000000, PPC_64B),
 #endif
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
index 0c66465d96..e12e533c67 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -395,6 +395,77 @@ TRANS(SUBFE, do_subf_XO, true, true)
 TRANS(SUBFME, do_subf_const_XO, tcg_constant_tl(-1LL), true, true)
 TRANS(SUBFZE, do_subf_const_XO, tcg_constant_tl(0), true, true)
 
+static bool trans_MULLI(DisasContext *ctx, arg_MULLI *a)
+{
+    tcg_gen_muli_tl(cpu_gpr[a->rt], cpu_gpr[a->ra], a->si);
+    return true;
+}
+
+static bool trans_MULLW(DisasContext *ctx, arg_MULLW *a)
+{
+#if defined(TARGET_PPC64)
+    TCGv_i64 t0, t1;
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+    tcg_gen_ext32s_tl(t0, cpu_gpr[a->ra]);
+    tcg_gen_ext32s_tl(t1, cpu_gpr[a->rb]);
+    tcg_gen_mul_i64(cpu_gpr[a->rt], t0, t1);
+#else
+    tcg_gen_mul_i32(cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]);
+#endif
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+    }
+    return true;
+}
+
+static bool trans_MULLWO(DisasContext *ctx, arg_MULLWO *a)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]);
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]);
+    tcg_gen_muls2_i32(t0, t1, t0, t1);
+#if defined(TARGET_PPC64)
+    tcg_gen_concat_i32_i64(cpu_gpr[a->rt], t0, t1);
+#else
+    tcg_gen_mov_i32(cpu_gpr[a->rt], t0);
+#endif
+
+    tcg_gen_sari_i32(t0, t0, 31);
+    tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
+    tcg_gen_extu_i32_tl(cpu_ov, t0);
+    if (is_isa300(ctx)) {
+        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+    }
+    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
+
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+    }
+    return true;
+}
+
+static bool do_mulhw(DisasContext *ctx, arg_XO_tab_rc *a,
+                     void (*helper)(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1,
+                                    TCGv_i32 arg2))
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]);
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]);
+    helper(t0, t1, t0, t1);
+    tcg_gen_extu_i32_tl(cpu_gpr[a->rt], t1);
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+    }
+    return true;
+}
+
+TRANS(MULHW, do_mulhw, tcg_gen_muls2_i32)
+TRANS(MULHWU, do_mulhw, tcg_gen_mulu2_i32)
+
 static bool trans_INVALID(DisasContext *ctx, arg_INVALID *a)
 {
     gen_invalid(ctx);
-- 
2.39.3



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 2/8] target/ppc: Make divw[u] handler method decodetree compatible.
  2024-04-16  6:39 [PATCH 0/8] target/ppc: Move fixed-point insns to decodetree Chinmay Rath
  2024-04-16  6:39 ` [PATCH 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions " Chinmay Rath
@ 2024-04-16  6:39 ` Chinmay Rath
  2024-04-16 17:57   ` Richard Henderson
  2024-04-16  6:39 ` [PATCH 3/8] target/ppc: Move divw[u, e, eu] instructions to decodetree Chinmay Rath
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 27+ messages in thread
From: Chinmay Rath @ 2024-04-16  6:39 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, richard.henderson, harshpb

The handler methods for divw[u] instructions internally use Rc(ctx->opcode),
for extraction of Rc field of instructions, which poses a problem if we move
the above said instructions to decodetree, as the ctx->opcode field is not
popluated in decodetree. Hence, making it decodetree compatible, so that the
mentioned insns can be safely move to decodetree specs.

Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
 target/ppc/translate.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index c45547a770..be7d807e3c 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1738,8 +1738,9 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
     }
 }
 
-static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
-                                     TCGv arg2, int sign, int compute_ov)
+static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret,
+                                     TCGv arg1, TCGv arg2, bool sign,
+                                     bool compute_ov, bool compute_rc0)
 {
     TCGv_i32 t0 = tcg_temp_new_i32();
     TCGv_i32 t1 = tcg_temp_new_i32();
@@ -1773,7 +1774,7 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
         tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
     }
 
-    if (unlikely(Rc(ctx->opcode) != 0)) {
+    if (unlikely(compute_rc0)) {
         gen_set_Rc0(ctx, ret);
     }
 }
@@ -1783,7 +1784,7 @@ static void glue(gen_, name)(DisasContext *ctx)                               \
 {                                                                             \
     gen_op_arith_divw(ctx, cpu_gpr[rD(ctx->opcode)],                          \
                      cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],      \
-                     sign, compute_ov);                                       \
+                     sign, compute_ov, Rc(ctx->opcode));                      \
 }
 /* divwu  divwu.  divwuo  divwuo.   */
 GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0);
-- 
2.39.3



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 3/8] target/ppc: Move divw[u, e, eu] instructions to decodetree.
  2024-04-16  6:39 [PATCH 0/8] target/ppc: Move fixed-point insns to decodetree Chinmay Rath
  2024-04-16  6:39 ` [PATCH 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions " Chinmay Rath
  2024-04-16  6:39 ` [PATCH 2/8] target/ppc: Make divw[u] handler method decodetree compatible Chinmay Rath
@ 2024-04-16  6:39 ` Chinmay Rath
  2024-04-16 18:19   ` Richard Henderson
  2024-04-16  6:39 ` [PATCH 4/8] target/ppc: Move neg, darn, mod{sw, uw} " Chinmay Rath
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 27+ messages in thread
From: Chinmay Rath @ 2024-04-16  6:39 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, richard.henderson, harshpb

Moving the following instructions to decodetree specification :
	 divw[u, e, eu][o][.] 	: XO-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
 target/ppc/helper.h                        |  4 +--
 target/ppc/insn32.decode                   |  5 ++++
 target/ppc/int_helper.c                    |  4 +--
 target/ppc/translate.c                     | 31 ----------------------
 target/ppc/translate/fixedpoint-impl.c.inc | 24 +++++++++++++++++
 5 files changed, 33 insertions(+), 35 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 86f97ee1e7..1fc8b7c5fd 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -55,8 +55,8 @@ DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 DEF_HELPER_4(divdeu, i64, env, i64, i64, i32)
 DEF_HELPER_4(divde, i64, env, i64, i64, i32)
 #endif
-DEF_HELPER_4(divweu, tl, env, tl, tl, i32)
-DEF_HELPER_4(divwe, tl, env, tl, tl, i32)
+DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32)
+DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32)
 
 DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 0184680db8..bfccebd9a7 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -362,6 +362,11 @@ MULLWO          011111 ..... ..... ..... 1 011101011 .  @XO_tab_rc
 MULHW           011111 ..... ..... ..... - 001001011 .  @XO_tab_rc
 MULHWU          011111 ..... ..... ..... - 000001011 .  @XO_tab_rc
 
+DIVW            011111 ..... ..... ..... . 111101011 .  @XO
+DIVWU           011111 ..... ..... ..... . 111001011 .  @XO
+DIVWE           011111 ..... ..... ..... . 110101011 .  @XO
+DIVWEU          011111 ..... ..... ..... . 110001011 .  @XO
+
 ## Fixed-Point Logical Instructions
 
 CFUGED          011111 ..... ..... ..... 0011011100 -   @X
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 0a5c3e78a4..dc1f72ff38 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -44,7 +44,7 @@ static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
     }
 }
 
-target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
+target_ulong helper_DIVWEU(CPUPPCState *env, target_ulong ra, target_ulong rb,
                            uint32_t oe)
 {
     uint64_t rt = 0;
@@ -71,7 +71,7 @@ target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
     return (target_ulong)rt;
 }
 
-target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
+target_ulong helper_DIVWE(CPUPPCState *env, target_ulong ra, target_ulong rb,
                           uint32_t oe)
 {
     int64_t rt = 0;
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index be7d807e3c..0a1d1d63b3 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1778,21 +1778,6 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret,
         gen_set_Rc0(ctx, ret);
     }
 }
-/* Div functions */
-#define GEN_INT_ARITH_DIVW(name, opc3, sign, compute_ov)                      \
-static void glue(gen_, name)(DisasContext *ctx)                               \
-{                                                                             \
-    gen_op_arith_divw(ctx, cpu_gpr[rD(ctx->opcode)],                          \
-                     cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],      \
-                     sign, compute_ov, Rc(ctx->opcode));                      \
-}
-/* divwu  divwu.  divwuo  divwuo.   */
-GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0);
-GEN_INT_ARITH_DIVW(divwuo, 0x1E, 0, 1);
-/* divw  divw.  divwo  divwo.   */
-GEN_INT_ARITH_DIVW(divw, 0x0F, 1, 0);
-GEN_INT_ARITH_DIVW(divwo, 0x1F, 1, 1);
-
 /* div[wd]eu[o][.] */
 #define GEN_DIVE(name, hlpr, compute_ov)                                      \
 static void gen_##name(DisasContext *ctx)                                     \
@@ -1805,11 +1790,6 @@ static void gen_##name(DisasContext *ctx)                                     \
     }                                                                         \
 }
 
-GEN_DIVE(divweu, divweu, 0);
-GEN_DIVE(divweuo, divweu, 1);
-GEN_DIVE(divwe, divwe, 0);
-GEN_DIVE(divweo, divwe, 1);
-
 #if defined(TARGET_PPC64)
 static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
                                      TCGv arg2, int sign, int compute_ov)
@@ -6562,17 +6542,6 @@ GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x00000000, PPC_NONE,
 GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300),
 #endif
 
-#undef GEN_INT_ARITH_DIVW
-#define GEN_INT_ARITH_DIVW(name, opc3, sign, compute_ov)                      \
-GEN_HANDLER(name, 0x1F, 0x0B, opc3, 0x00000000, PPC_INTEGER)
-GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0),
-GEN_INT_ARITH_DIVW(divwuo, 0x1E, 0, 1),
-GEN_INT_ARITH_DIVW(divw, 0x0F, 1, 0),
-GEN_INT_ARITH_DIVW(divwo, 0x1F, 1, 1),
-GEN_HANDLER_E(divwe, 0x1F, 0x0B, 0x0D, 0, PPC_NONE, PPC2_DIVE_ISA206),
-GEN_HANDLER_E(divweo, 0x1F, 0x0B, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206),
-GEN_HANDLER_E(divweu, 0x1F, 0x0B, 0x0C, 0, PPC_NONE, PPC2_DIVE_ISA206),
-GEN_HANDLER_E(divweuo, 0x1F, 0x0B, 0x1C, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(modsw, 0x1F, 0x0B, 0x18, 0x00000001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(moduw, 0x1F, 0x0B, 0x08, 0x00000001, PPC_NONE, PPC2_ISA300),
 
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
index e12e533c67..3f787bbeaa 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -466,6 +466,30 @@ static bool do_mulhw(DisasContext *ctx, arg_XO_tab_rc *a,
 TRANS(MULHW, do_mulhw, tcg_gen_muls2_i32)
 TRANS(MULHWU, do_mulhw, tcg_gen_mulu2_i32)
 
+static bool do_divw(DisasContext *ctx, arg_XO *a, int sign)
+{
+    gen_op_arith_divw(ctx, cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb],
+                      sign, a->oe, a->rc);
+    return true;
+}
+
+static bool do_divwe(DisasContext *ctx, arg_XO *a,
+                     void (*helper)(TCGv, TCGv_ptr, TCGv, TCGv, TCGv_i32))
+{
+    REQUIRE_INSNS_FLAGS2(ctx, DIVE_ISA206);
+    helper(cpu_gpr[a->rt], tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb],
+           tcg_constant_i32(a->oe));
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+    }
+    return true;
+}
+
+TRANS(DIVW, do_divw, 1);
+TRANS(DIVWU, do_divw, 0);
+TRANS(DIVWE, do_divwe, gen_helper_DIVWE);
+TRANS(DIVWEU, do_divwe, gen_helper_DIVWEU);
+
 static bool trans_INVALID(DisasContext *ctx, arg_INVALID *a)
 {
     gen_invalid(ctx);
-- 
2.39.3



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 4/8] target/ppc: Move neg, darn, mod{sw, uw} to decodetree.
  2024-04-16  6:39 [PATCH 0/8] target/ppc: Move fixed-point insns to decodetree Chinmay Rath
                   ` (2 preceding siblings ...)
  2024-04-16  6:39 ` [PATCH 3/8] target/ppc: Move divw[u, e, eu] instructions to decodetree Chinmay Rath
@ 2024-04-16  6:39 ` Chinmay Rath
  2024-04-16 18:25   ` Richard Henderson
  2024-04-16  6:39 ` [PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) " Chinmay Rath
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 27+ messages in thread
From: Chinmay Rath @ 2024-04-16  6:39 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, richard.henderson, harshpb

Moving the below instructions to decodetree specification :

	neg[o][.]       	: XO-form
	mod{sw, uw}, darn	: X-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
 target/ppc/helper.h                        |  4 +-
 target/ppc/insn32.decode                   |  8 ++++
 target/ppc/int_helper.c                    |  4 +-
 target/ppc/translate.c                     | 56 ----------------------
 target/ppc/translate/fixedpoint-impl.c.inc | 44 +++++++++++++++++
 5 files changed, 56 insertions(+), 60 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 1fc8b7c5fd..09d0b0074b 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -71,8 +71,8 @@ DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
-DEF_HELPER_FLAGS_0(darn32, TCG_CALL_NO_RWG, tl)
-DEF_HELPER_FLAGS_0(darn64, TCG_CALL_NO_RWG, tl)
+DEF_HELPER_FLAGS_0(DARN32, TCG_CALL_NO_RWG, tl)
+DEF_HELPER_FLAGS_0(DARN64, TCG_CALL_NO_RWG, tl)
 #endif
 
 DEF_HELPER_FLAGS_1(cntlsw32, TCG_CALL_NO_RWG_SE, i32, i32)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index bfccebd9a7..654f55471b 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -187,6 +187,9 @@
 &X_a            ra
 @X_a            ...... ra:3 .. ..... ..... .......... .         &X_a
 
+&X_tl           rt l
+@X_tl           ...... rt:5 ... l:2 ..... .......... .          &X_tl
+
 &XO             rt ra rb oe:bool rc:bool
 @XO             ...... rt:5 ra:5 rb:5 oe:1 ......... rc:1       &XO
 
@@ -367,6 +370,11 @@ DIVWU           011111 ..... ..... ..... . 111001011 .  @XO
 DIVWE           011111 ..... ..... ..... . 110101011 .  @XO
 DIVWEU          011111 ..... ..... ..... . 110001011 .  @XO
 
+MODSW           011111 ..... ..... ..... 1100001011 -   @X
+MODUW           011111 ..... ..... ..... 0100001011 -   @X
+DARN            011111 ..... --- .. ----- 1011110011 -  @X_tl
+NEG             011111 ..... ..... ----- . 001101000 .  @XO_ta
+
 ## Fixed-Point Logical Instructions
 
 CFUGED          011111 ..... ..... ..... 0011011100 -   @X
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index dc1f72ff38..bc25d5b062 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -171,7 +171,7 @@ uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
 /*
  * Return a random number.
  */
-uint64_t helper_darn32(void)
+uint64_t helper_DARN32(void)
 {
     Error *err = NULL;
     uint32_t ret;
@@ -186,7 +186,7 @@ uint64_t helper_darn32(void)
     return ret;
 }
 
-uint64_t helper_darn64(void)
+uint64_t helper_DARN64(void)
 {
     Error *err = NULL;
     uint64_t ret;
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 0a1d1d63b3..436fcfc645 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1878,17 +1878,6 @@ static inline void gen_op_arith_modw(DisasContext *ctx, TCGv ret, TCGv arg1,
     }
 }
 
-#define GEN_INT_ARITH_MODW(name, opc3, sign)                                \
-static void glue(gen_, name)(DisasContext *ctx)                             \
-{                                                                           \
-    gen_op_arith_modw(ctx, cpu_gpr[rD(ctx->opcode)],                        \
-                      cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],   \
-                      sign);                                                \
-}
-
-GEN_INT_ARITH_MODW(moduw, 0x08, 0);
-GEN_INT_ARITH_MODW(modsw, 0x18, 1);
-
 #if defined(TARGET_PPC64)
 static inline void gen_op_arith_modd(DisasContext *ctx, TCGv ret, TCGv arg1,
                                      TCGv arg2, int sign)
@@ -2055,27 +2044,6 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
     }
 }
 
-/* neg neg. nego nego. */
-static inline void gen_op_arith_neg(DisasContext *ctx, bool compute_ov)
-{
-    TCGv zero = tcg_constant_tl(0);
-    gen_op_arith_subf(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                      zero, 0, 0, compute_ov, Rc(ctx->opcode));
-}
-
-static void gen_neg(DisasContext *ctx)
-{
-    tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-    if (unlikely(Rc(ctx->opcode))) {
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-    }
-}
-
-static void gen_nego(DisasContext *ctx)
-{
-    gen_op_arith_neg(ctx, 1);
-}
-
 /***                            Integer logical                            ***/
 #define GEN_LOGICAL2(name, tcg_op, opc, type)                                 \
 static void glue(gen_, name)(DisasContext *ctx)                               \
@@ -2401,24 +2369,6 @@ static void gen_cnttzd(DisasContext *ctx)
         gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
     }
 }
-
-/* darn */
-static void gen_darn(DisasContext *ctx)
-{
-    int l = L(ctx->opcode);
-
-    if (l > 2) {
-        tcg_gen_movi_i64(cpu_gpr[rD(ctx->opcode)], -1);
-    } else {
-        translator_io_start(&ctx->base);
-        if (l == 0) {
-            gen_helper_darn32(cpu_gpr[rD(ctx->opcode)]);
-        } else {
-            /* Return 64-bit random for both CRN and RRN */
-            gen_helper_darn64(cpu_gpr[rD(ctx->opcode)]);
-        }
-    }
-}
 #endif
 
 /***                             Integer rotate                            ***/
@@ -6330,8 +6280,6 @@ GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(mulld, 0x1F, 0x09, 0x07, 0x00000000, PPC_64B),
 #endif
-GEN_HANDLER(neg, 0x1F, 0x08, 0x03, 0x0000F800, PPC_INTEGER),
-GEN_HANDLER(nego, 0x1F, 0x08, 0x13, 0x0000F800, PPC_INTEGER),
 GEN_HANDLER2(andi_, "andi.", 0x1C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER2(andis_, "andis.", 0x1D, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(cntlzw, 0x1F, 0x1A, 0x00, 0x00000000, PPC_INTEGER),
@@ -6352,7 +6300,6 @@ GEN_HANDLER_E(prtyw, 0x1F, 0x1A, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER(popcntd, 0x1F, 0x1A, 0x0F, 0x0000F801, PPC_POPCNTWD),
 GEN_HANDLER(cntlzd, 0x1F, 0x1A, 0x01, 0x00000000, PPC_64B),
 GEN_HANDLER_E(cnttzd, 0x1F, 0x1A, 0x11, 0x00000000, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(darn, 0x1F, 0x13, 0x17, 0x001CF801, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(prtyd, 0x1F, 0x1A, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER_E(bpermd, 0x1F, 0x1C, 0x07, 0x00000001, PPC_NONE, PPC2_PERM_ISA206),
 #endif
@@ -6542,9 +6489,6 @@ GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x00000000, PPC_NONE,
 GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300),
 #endif
 
-GEN_HANDLER_E(modsw, 0x1F, 0x0B, 0x18, 0x00000001, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(moduw, 0x1F, 0x0B, 0x08, 0x00000001, PPC_NONE, PPC2_ISA300),
-
 #if defined(TARGET_PPC64)
 #undef GEN_INT_ARITH_DIVD
 #define GEN_INT_ARITH_DIVD(name, opc3, sign, compute_ov)                      \
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
index 3f787bbeaa..2dfd6bea57 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -490,6 +490,50 @@ TRANS(DIVWU, do_divw, 0);
 TRANS(DIVWE, do_divwe, gen_helper_DIVWE);
 TRANS(DIVWEU, do_divwe, gen_helper_DIVWEU);
 
+static bool do_modw(DisasContext *ctx, arg_X *a, bool sign)
+{
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    gen_op_arith_modw(ctx, cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb],
+                      sign);
+    return true;
+}
+
+TRANS(MODUW, do_modw, false);
+TRANS(MODSW, do_modw, true);
+
+static bool trans_NEG(DisasContext *ctx, arg_NEG *a)
+{
+    if (a->oe) {
+        TCGv zero = tcg_constant_tl(0);
+        gen_op_arith_subf(ctx, cpu_gpr[a->rt], cpu_gpr[a->ra], zero,
+                          false, false, true, a->rc);
+    } else {
+        tcg_gen_neg_tl(cpu_gpr[a->rt], cpu_gpr[a->ra]);
+        if (unlikely(a->rc)) {
+            gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+        }
+    }
+    return true;
+}
+
+static bool trans_DARN(DisasContext *ctx, arg_DARN *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    if (a->l > 2) {
+        tcg_gen_movi_i64(cpu_gpr[a->rt], -1);
+    } else {
+        translator_io_start(&ctx->base);
+        if (a->l == 0) {
+            gen_helper_DARN32(cpu_gpr[a->rt]);
+        } else {
+            /* Return 64-bit random for both CRN and RRN */
+            gen_helper_DARN64(cpu_gpr[a->rt]);
+        }
+    }
+    return true;
+}
+
 static bool trans_INVALID(DisasContext *ctx, arg_INVALID *a)
 {
     gen_invalid(ctx);
-- 
2.39.3



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree.
  2024-04-16  6:39 [PATCH 0/8] target/ppc: Move fixed-point insns to decodetree Chinmay Rath
                   ` (3 preceding siblings ...)
  2024-04-16  6:39 ` [PATCH 4/8] target/ppc: Move neg, darn, mod{sw, uw} " Chinmay Rath
@ 2024-04-16  6:39 ` Chinmay Rath
  2024-04-16 18:36   ` Richard Henderson
  2024-04-16  6:39 ` [PATCH 6/8] target/ppc: Move div/mod fixed-point insns (64 bits " Chinmay Rath
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 27+ messages in thread
From: Chinmay Rath @ 2024-04-16  6:39 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, richard.henderson, harshpb

Moving the following instructions to decodetree :

	mul{ld, ldo, hd, hdu}[.]	: XO-form
	madd{hd, hdu, ld}		: VA-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op'
flag.

Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
 target/ppc/insn32.decode                   |   9 ++
 target/ppc/translate.c                     | 101 ---------------------
 target/ppc/translate/fixedpoint-impl.c.inc |  85 +++++++++++++++++
 3 files changed, 94 insertions(+), 101 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 654f55471b..61c59bbde0 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -375,6 +375,15 @@ MODUW           011111 ..... ..... ..... 0100001011 -   @X
 DARN            011111 ..... --- .. ----- 1011110011 -  @X_tl
 NEG             011111 ..... ..... ----- . 001101000 .  @XO_ta
 
+MULLD           011111 ..... ..... ..... 0 011101001 .  @XO_tab_rc
+MULLDO          011111 ..... ..... ..... 1 011101001 .  @XO_tab_rc
+MULHD           011111 ..... ..... ..... - 001001001 .  @XO_tab_rc
+MULHDU          011111 ..... ..... ..... - 000001001 .  @XO_tab_rc
+
+MADDLD          000100 ..... ..... ..... ..... 110011   @VA
+MADDHD          000100 ..... ..... ..... ..... 110000   @VA
+MADDHDU         000100 ..... ..... ..... ..... 110001   @VA
+
 ## Fixed-Point Logical Instructions
 
 CFUGED          011111 ..... ..... ..... 0011011100 -   @X
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 436fcfc645..8fa125d0ae 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1918,62 +1918,6 @@ GEN_INT_ARITH_MODD(modud, 0x08, 0);
 GEN_INT_ARITH_MODD(modsd, 0x18, 1);
 #endif
 
-#if defined(TARGET_PPC64)
-/* mulhd  mulhd. */
-static void gen_mulhd(DisasContext *ctx)
-{
-    TCGv lo = tcg_temp_new();
-    tcg_gen_muls2_tl(lo, cpu_gpr[rD(ctx->opcode)],
-                     cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-    }
-}
-
-/* mulhdu  mulhdu. */
-static void gen_mulhdu(DisasContext *ctx)
-{
-    TCGv lo = tcg_temp_new();
-    tcg_gen_mulu2_tl(lo, cpu_gpr[rD(ctx->opcode)],
-                     cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-    }
-}
-
-/* mulld  mulld. */
-static void gen_mulld(DisasContext *ctx)
-{
-    tcg_gen_mul_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                   cpu_gpr[rB(ctx->opcode)]);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-    }
-}
-
-/* mulldo  mulldo. */
-static void gen_mulldo(DisasContext *ctx)
-{
-    TCGv_i64 t0 = tcg_temp_new_i64();
-    TCGv_i64 t1 = tcg_temp_new_i64();
-
-    tcg_gen_muls2_i64(t0, t1, cpu_gpr[rA(ctx->opcode)],
-                      cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_mov_i64(cpu_gpr[rD(ctx->opcode)], t0);
-
-    tcg_gen_sari_i64(t0, t0, 63);
-    tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1);
-    if (is_isa300(ctx)) {
-        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
-    }
-    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
-
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-    }
-}
-#endif
-
 /* Common subf function */
 static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
                                      TCGv arg2, bool add_ca, bool compute_ca,
@@ -5884,36 +5828,6 @@ static void gen_icbt_440(DisasContext *ctx)
      */
 }
 
-#if defined(TARGET_PPC64)
-static void gen_maddld(DisasContext *ctx)
-{
-    TCGv_i64 t1 = tcg_temp_new_i64();
-
-    tcg_gen_mul_i64(t1, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_add_i64(cpu_gpr[rD(ctx->opcode)], t1, cpu_gpr[rC(ctx->opcode)]);
-}
-
-/* maddhd maddhdu */
-static void gen_maddhd_maddhdu(DisasContext *ctx)
-{
-    TCGv_i64 lo = tcg_temp_new_i64();
-    TCGv_i64 hi = tcg_temp_new_i64();
-    TCGv_i64 t1 = tcg_temp_new_i64();
-
-    if (Rc(ctx->opcode)) {
-        tcg_gen_mulu2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)],
-                          cpu_gpr[rB(ctx->opcode)]);
-        tcg_gen_movi_i64(t1, 0);
-    } else {
-        tcg_gen_muls2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)],
-                          cpu_gpr[rB(ctx->opcode)]);
-        tcg_gen_sari_i64(t1, cpu_gpr[rC(ctx->opcode)], 63);
-    }
-    tcg_gen_add2_i64(t1, cpu_gpr[rD(ctx->opcode)], lo, hi,
-                     cpu_gpr[rC(ctx->opcode)], t1);
-}
-#endif /* defined(TARGET_PPC64) */
-
 static void gen_tbegin(DisasContext *ctx)
 {
     if (unlikely(!ctx->tm_enabled)) {
@@ -6277,9 +6191,6 @@ GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL),
-#if defined(TARGET_PPC64)
-GEN_HANDLER(mulld, 0x1F, 0x09, 0x07, 0x00000000, PPC_64B),
-#endif
 GEN_HANDLER2(andi_, "andi.", 0x1C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER2(andis_, "andis.", 0x1D, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(cntlzw, 0x1F, 0x1A, 0x00, 0x00000000, PPC_INTEGER),
@@ -6483,11 +6394,6 @@ GEN_HANDLER(lvsl, 0x1f, 0x06, 0x00, 0x00000001, PPC_ALTIVEC),
 GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x00000001, PPC_ALTIVEC),
 GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC),
 GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff0000, PPC_ALTIVEC),
-#if defined(TARGET_PPC64)
-GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x00000000, PPC_NONE,
-              PPC2_ISA300),
-GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300),
-#endif
 
 #if defined(TARGET_PPC64)
 #undef GEN_INT_ARITH_DIVD
@@ -6504,13 +6410,6 @@ GEN_HANDLER_E(divde, 0x1F, 0x09, 0x0D, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divdeo, 0x1F, 0x09, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(modsd, 0x1F, 0x09, 0x18, 0x00000001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(modud, 0x1F, 0x09, 0x08, 0x00000001, PPC_NONE, PPC2_ISA300),
-
-#undef GEN_INT_ARITH_MUL_HELPER
-#define GEN_INT_ARITH_MUL_HELPER(name, opc3)                                  \
-GEN_HANDLER(name, 0x1F, 0x09, opc3, 0x00000000, PPC_64B)
-GEN_INT_ARITH_MUL_HELPER(mulhdu, 0x00),
-GEN_INT_ARITH_MUL_HELPER(mulhd, 0x02),
-GEN_INT_ARITH_MUL_HELPER(mulldo, 0x17),
 #endif
 
 #undef GEN_LOGICAL1
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
index 2dfd6bea57..561fb2d94c 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -534,6 +534,91 @@ static bool trans_DARN(DisasContext *ctx, arg_DARN *a)
     return true;
 }
 
+static bool trans_MULLD(DisasContext *ctx, arg_MULLD *a)
+{
+    REQUIRE_64BIT(ctx);
+    tcg_gen_mul_tl(cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]);
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+    }
+    return true;
+}
+
+static bool trans_MULLDO(DisasContext *ctx, arg_MULLD *a)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    REQUIRE_64BIT(ctx);
+    tcg_gen_muls2_i64(t0, t1, cpu_gpr[a->ra], cpu_gpr[a->rb]);
+    tcg_gen_mov_i64(cpu_gpr[a->rt], t0);
+
+    tcg_gen_sari_i64(t0, t0, 63);
+    tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1);
+    if (is_isa300(ctx)) {
+        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+    }
+    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
+
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+    }
+    return true;
+}
+
+static bool do_mulhd(DisasContext *ctx, arg_XO_tab_rc *a,
+                     void (*helper)(TCGv, TCGv, TCGv, TCGv))
+{
+    TCGv lo = tcg_temp_new();
+    helper(lo, cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]);
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+    }
+    return true;
+}
+
+TRANS64(MULHD, do_mulhd, tcg_gen_muls2_tl);
+TRANS64(MULHDU, do_mulhd, tcg_gen_mulu2_tl);
+
+static bool trans_MADDLD(DisasContext *ctx, arg_MADDLD *a)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    tcg_gen_mul_i64(t1, cpu_gpr[a->vra], cpu_gpr[a->vrb]);
+    tcg_gen_add_i64(cpu_gpr[a->vrt], t1, cpu_gpr[a->rc]);
+    return true;
+}
+
+static bool trans_MADDHD(DisasContext *ctx, arg_MADDHD *a)
+{
+    TCGv_i64 lo = tcg_temp_new_i64();
+    TCGv_i64 hi = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    tcg_gen_muls2_i64(lo, hi, cpu_gpr[a->vra], cpu_gpr[a->vrb]);
+    tcg_gen_sari_i64(t1, cpu_gpr[a->rc], 63);
+    tcg_gen_add2_i64(t1, cpu_gpr[a->vrt], lo, hi, cpu_gpr[a->rc], t1);
+    return true;
+}
+
+static bool trans_MADDHDU(DisasContext *ctx, arg_MADDHDU *a)
+{
+    TCGv_i64 lo = tcg_temp_new_i64();
+    TCGv_i64 hi = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    tcg_gen_mulu2_i64(lo, hi, cpu_gpr[a->vra], cpu_gpr[a->vrb]);
+    tcg_gen_movi_i64(t1, 0);
+    tcg_gen_add2_i64(t1, cpu_gpr[a->vrt], lo, hi, cpu_gpr[a->rc], t1);
+    return true;
+}
+
 static bool trans_INVALID(DisasContext *ctx, arg_INVALID *a)
 {
     gen_invalid(ctx);
-- 
2.39.3



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 6/8] target/ppc: Move div/mod fixed-point insns (64 bits operands) to decodetree.
  2024-04-16  6:39 [PATCH 0/8] target/ppc: Move fixed-point insns to decodetree Chinmay Rath
                   ` (4 preceding siblings ...)
  2024-04-16  6:39 ` [PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) " Chinmay Rath
@ 2024-04-16  6:39 ` Chinmay Rath
  2024-04-16 18:38   ` Richard Henderson
  2024-04-16  6:39 ` [PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions " Chinmay Rath
  2024-04-16  6:39 ` [PATCH 8/8] target/ppc: Move logical fixed-point " Chinmay Rath
  7 siblings, 1 reply; 27+ messages in thread
From: Chinmay Rath @ 2024-04-16  6:39 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, richard.henderson, harshpb

Moving the below instructions to decodetree specification :

	divd[u, e, eu][o][.]	: XO-form
	mod{sd, ud}		: X-form

With this patch, all the fixed-point arithmetic instructions have been
moved to decodetree.
The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.
Also, remaned do_divwe method in fixedpoint-impl.c.inc to do_dive because it is
now used to divide doubleword operands as well, and not just words.

Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
 target/ppc/helper.h                        |  4 +-
 target/ppc/insn32.decode                   |  8 +++
 target/ppc/int_helper.c                    |  4 +-
 target/ppc/translate.c                     | 65 ++--------------------
 target/ppc/translate/fixedpoint-impl.c.inc | 29 +++++++++-
 5 files changed, 42 insertions(+), 68 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 09d0b0074b..e862bdceaf 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -52,8 +52,8 @@ DEF_HELPER_FLAGS_2(icbiep, TCG_CALL_NO_WG, void, env, tl)
 DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 
 #if defined(TARGET_PPC64)
-DEF_HELPER_4(divdeu, i64, env, i64, i64, i32)
-DEF_HELPER_4(divde, i64, env, i64, i64, i32)
+DEF_HELPER_4(DIVDEU, i64, env, i64, i64, i32)
+DEF_HELPER_4(DIVDE, i64, env, i64, i64, i32)
 #endif
 DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32)
 DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 61c59bbde0..509961023b 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -384,6 +384,14 @@ MADDLD          000100 ..... ..... ..... ..... 110011   @VA
 MADDHD          000100 ..... ..... ..... ..... 110000   @VA
 MADDHDU         000100 ..... ..... ..... ..... 110001   @VA
 
+DIVD            011111 ..... ..... ..... . 111101001 .  @XO
+DIVDU           011111 ..... ..... ..... . 111001001 .  @XO
+DIVDE           011111 ..... ..... ..... . 110101001 .  @XO
+DIVDEU          011111 ..... ..... ..... . 110001001 .  @XO
+
+MODSD           011111 ..... ..... ..... 1100001001 -   @X
+MODUD           011111 ..... ..... ..... 0100001001 -   @X
+
 ## Fixed-Point Logical Instructions
 
 CFUGED          011111 ..... ..... ..... 0011011100 -   @X
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index bc25d5b062..585c2b65d3 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -101,7 +101,7 @@ target_ulong helper_DIVWE(CPUPPCState *env, target_ulong ra, target_ulong rb,
 
 #if defined(TARGET_PPC64)
 
-uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
+uint64_t helper_DIVDEU(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
 {
     uint64_t rt = 0;
     int overflow = 0;
@@ -120,7 +120,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
     return rt;
 }
 
-uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
+uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 {
     uint64_t rt = 0;
     int64_t ra = (int64_t)rau;
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 8fa125d0ae..8900da85e5 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1778,21 +1778,11 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret,
         gen_set_Rc0(ctx, ret);
     }
 }
-/* div[wd]eu[o][.] */
-#define GEN_DIVE(name, hlpr, compute_ov)                                      \
-static void gen_##name(DisasContext *ctx)                                     \
-{                                                                             \
-    TCGv_i32 t0 = tcg_constant_i32(compute_ov);                               \
-    gen_helper_##hlpr(cpu_gpr[rD(ctx->opcode)], tcg_env,                      \
-                     cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0); \
-    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);                           \
-    }                                                                         \
-}
 
 #if defined(TARGET_PPC64)
-static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
-                                     TCGv arg2, int sign, int compute_ov)
+static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret,
+                                     TCGv arg1, TCGv arg2, bool sign,
+                                     bool compute_ov, bool compute_rc0)
 {
     TCGv_i64 t0 = tcg_temp_new_i64();
     TCGv_i64 t1 = tcg_temp_new_i64();
@@ -1824,29 +1814,10 @@ static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
         tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
     }
 
-    if (unlikely(Rc(ctx->opcode) != 0)) {
+    if (unlikely(compute_rc0)) {
         gen_set_Rc0(ctx, ret);
     }
 }
-
-#define GEN_INT_ARITH_DIVD(name, opc3, sign, compute_ov)                      \
-static void glue(gen_, name)(DisasContext *ctx)                               \
-{                                                                             \
-    gen_op_arith_divd(ctx, cpu_gpr[rD(ctx->opcode)],                          \
-                      cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],     \
-                      sign, compute_ov);                                      \
-}
-/* divdu  divdu.  divduo  divduo.   */
-GEN_INT_ARITH_DIVD(divdu, 0x0E, 0, 0);
-GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1);
-/* divd  divd.  divdo  divdo.   */
-GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0);
-GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1);
-
-GEN_DIVE(divdeu, divdeu, 0);
-GEN_DIVE(divdeuo, divdeu, 1);
-GEN_DIVE(divde, divde, 0);
-GEN_DIVE(divdeo, divde, 1);
 #endif
 
 static inline void gen_op_arith_modw(DisasContext *ctx, TCGv ret, TCGv arg1,
@@ -1905,17 +1876,6 @@ static inline void gen_op_arith_modd(DisasContext *ctx, TCGv ret, TCGv arg1,
         tcg_gen_remu_i64(ret, t0, t1);
     }
 }
-
-#define GEN_INT_ARITH_MODD(name, opc3, sign)                            \
-static void glue(gen_, name)(DisasContext *ctx)                           \
-{                                                                         \
-  gen_op_arith_modd(ctx, cpu_gpr[rD(ctx->opcode)],                        \
-                    cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],   \
-                    sign);                                                \
-}
-
-GEN_INT_ARITH_MODD(modud, 0x08, 0);
-GEN_INT_ARITH_MODD(modsd, 0x18, 1);
 #endif
 
 /* Common subf function */
@@ -6395,23 +6355,6 @@ GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x00000001, PPC_ALTIVEC),
 GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC),
 GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff0000, PPC_ALTIVEC),
 
-#if defined(TARGET_PPC64)
-#undef GEN_INT_ARITH_DIVD
-#define GEN_INT_ARITH_DIVD(name, opc3, sign, compute_ov)                      \
-GEN_HANDLER(name, 0x1F, 0x09, opc3, 0x00000000, PPC_64B)
-GEN_INT_ARITH_DIVD(divdu, 0x0E, 0, 0),
-GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1),
-GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0),
-GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1),
-
-GEN_HANDLER_E(divdeu, 0x1F, 0x09, 0x0C, 0, PPC_NONE, PPC2_DIVE_ISA206),
-GEN_HANDLER_E(divdeuo, 0x1F, 0x09, 0x1C, 0, PPC_NONE, PPC2_DIVE_ISA206),
-GEN_HANDLER_E(divde, 0x1F, 0x09, 0x0D, 0, PPC_NONE, PPC2_DIVE_ISA206),
-GEN_HANDLER_E(divdeo, 0x1F, 0x09, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206),
-GEN_HANDLER_E(modsd, 0x1F, 0x09, 0x18, 0x00000001, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(modud, 0x1F, 0x09, 0x08, 0x00000001, PPC_NONE, PPC2_ISA300),
-#endif
-
 #undef GEN_LOGICAL1
 #undef GEN_LOGICAL2
 #define GEN_LOGICAL2(name, tcg_op, opc, type)                                 \
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
index 561fb2d94c..d8a0a21ebe 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -473,7 +473,7 @@ static bool do_divw(DisasContext *ctx, arg_XO *a, int sign)
     return true;
 }
 
-static bool do_divwe(DisasContext *ctx, arg_XO *a,
+static bool do_dive(DisasContext *ctx, arg_XO *a,
                      void (*helper)(TCGv, TCGv_ptr, TCGv, TCGv, TCGv_i32))
 {
     REQUIRE_INSNS_FLAGS2(ctx, DIVE_ISA206);
@@ -487,8 +487,8 @@ static bool do_divwe(DisasContext *ctx, arg_XO *a,
 
 TRANS(DIVW, do_divw, 1);
 TRANS(DIVWU, do_divw, 0);
-TRANS(DIVWE, do_divwe, gen_helper_DIVWE);
-TRANS(DIVWEU, do_divwe, gen_helper_DIVWEU);
+TRANS(DIVWE, do_dive, gen_helper_DIVWE);
+TRANS(DIVWEU, do_dive, gen_helper_DIVWEU);
 
 static bool do_modw(DisasContext *ctx, arg_X *a, bool sign)
 {
@@ -619,6 +619,29 @@ static bool trans_MADDHDU(DisasContext *ctx, arg_MADDHDU *a)
     return true;
 }
 
+static bool do_divd(DisasContext *ctx, arg_XO *a, bool sign)
+{
+    gen_op_arith_divd(ctx, cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb],
+                      sign, a->oe, a->rc);
+    return true;
+}
+
+static bool do_modd(DisasContext *ctx, arg_X *a, bool sign)
+{
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    gen_op_arith_modd(ctx, cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb],
+                      sign);
+    return true;
+}
+
+TRANS64(DIVD, do_divd, true);
+TRANS64(DIVDU, do_divd, false);
+TRANS64(DIVDE, do_dive, gen_helper_DIVDE);
+TRANS64(DIVDEU, do_dive, gen_helper_DIVDEU);
+
+TRANS64(MODSD, do_modd, true);
+TRANS64(MODUD, do_modd, false);
+
 static bool trans_INVALID(DisasContext *ctx, arg_INVALID *a)
 {
     gen_invalid(ctx);
-- 
2.39.3



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.
  2024-04-16  6:39 [PATCH 0/8] target/ppc: Move fixed-point insns to decodetree Chinmay Rath
                   ` (5 preceding siblings ...)
  2024-04-16  6:39 ` [PATCH 6/8] target/ppc: Move div/mod fixed-point insns (64 bits " Chinmay Rath
@ 2024-04-16  6:39 ` Chinmay Rath
  2024-04-16 19:20   ` Richard Henderson
  2024-04-16  6:39 ` [PATCH 8/8] target/ppc: Move logical fixed-point " Chinmay Rath
  7 siblings, 1 reply; 27+ messages in thread
From: Chinmay Rath @ 2024-04-16  6:39 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, richard.henderson, harshpb

Moving the following instructions to decodetree specification :

	cmp{rb, eqb}, t{w, d}	: X-form
	t{w, d}i		: D-form
	isel			: A-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
 target/ppc/helper.h                        |   6 +-
 target/ppc/insn32.decode                   |  16 +++
 target/ppc/excp_helper.c                   |   4 +-
 target/ppc/int_helper.c                    |   2 +-
 target/ppc/translate.c                     | 133 +--------------------
 target/ppc/translate/fixedpoint-impl.c.inc | 123 +++++++++++++++++++
 6 files changed, 148 insertions(+), 136 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index e862bdceaf..05f7ab5f6e 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -1,8 +1,8 @@
 DEF_HELPER_FLAGS_3(raise_exception_err, TCG_CALL_NO_WG, noreturn, env, i32, i32)
 DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32)
-DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(TW, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #if defined(TARGET_PPC64)
-DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(TD, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #endif
 DEF_HELPER_4(HASHST, void, env, tl, tl, tl)
 DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl)
@@ -67,7 +67,7 @@ DEF_HELPER_FLAGS_2(PEXTD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl)
 #if defined(TARGET_PPC64)
-DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
+DEF_HELPER_FLAGS_2(CMPEQB, TCG_CALL_NO_RWG_SE, i32, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 509961023b..80a7bb1872 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -23,6 +23,9 @@
 &A_tb           frt frb rc:bool
 @A_tb           ...... frt:5 ..... frb:5 ..... ..... rc:1       &A_tb
 
+&A_tab_bc       rt ra rb bc
+@A_tab_bc       ...... rt:5 ra:5 rb:5 bc:5 ..... .              &A_tab_bc
+
 &D              rt ra si:int64_t
 @D              ...... rt:5 ra:5 si:s16                         &D
 
@@ -331,6 +334,19 @@ CMP             011111 ... - . ..... ..... 0000000000 - @X_bfl
 CMPL            011111 ... - . ..... ..... 0000100000 - @X_bfl
 CMPI            001011 ... - . ..... ................   @D_bfs
 CMPLI           001010 ... - . ..... ................   @D_bfu
+CMPRB           011111 ... - . ..... ..... 0011000000 - @X_bfl
+CMPEQB          011111 ... -- ..... ..... 0011100000 -  @X_bf
+
+### Fixed-Point Trap Instructions
+
+TW              011111 ..... ..... ..... 0000000100 -   @X
+TD              011111 ..... ..... ..... 0001000100 -   @X
+TWI             000011 ..... ..... ................     @D
+TDI             000010 ..... ..... ................     @D
+
+### Fixed-Point Select Instruction
+
+ISEL            011111 ..... ..... ..... ..... 01111 -  @A_tab_bc
 
 ### Fixed-Point Arithmetic Instructions
 
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 674c05a2ce..79dd9b82cf 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2750,7 +2750,7 @@ void helper_rfmci(CPUPPCState *env)
 }
 #endif /* !CONFIG_USER_ONLY */
 
-void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
+void helper_TW(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
                uint32_t flags)
 {
     if (!likely(!(((int32_t)arg1 < (int32_t)arg2 && (flags & 0x10)) ||
@@ -2764,7 +2764,7 @@ void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
 }
 
 #ifdef TARGET_PPC64
-void helper_td(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
+void helper_TD(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
                uint32_t flags)
 {
     if (!likely(!(((int64_t)arg1 < (int64_t)arg2 && (flags & 0x10)) ||
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 585c2b65d3..d12dcc28e1 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -159,7 +159,7 @@ uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 /* When you XOR the pattern and there is a match, that byte will be zero */
 #define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
 
-uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
+uint32_t helper_CMPEQB(target_ulong ra, target_ulong rb)
 {
     return hasvalue(rb, ra) ? CRF_GT : 0;
 }
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 8900da85e5..98e642b19a 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1564,66 +1564,6 @@ static inline void gen_set_Rc0(DisasContext *ctx, TCGv reg)
     }
 }
 
-/* cmprb - range comparison: isupper, isaplha, islower*/
-static void gen_cmprb(DisasContext *ctx)
-{
-    TCGv_i32 src1 = tcg_temp_new_i32();
-    TCGv_i32 src2 = tcg_temp_new_i32();
-    TCGv_i32 src2lo = tcg_temp_new_i32();
-    TCGv_i32 src2hi = tcg_temp_new_i32();
-    TCGv_i32 crf = cpu_crf[crfD(ctx->opcode)];
-
-    tcg_gen_trunc_tl_i32(src1, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_trunc_tl_i32(src2, cpu_gpr[rB(ctx->opcode)]);
-
-    tcg_gen_andi_i32(src1, src1, 0xFF);
-    tcg_gen_ext8u_i32(src2lo, src2);
-    tcg_gen_shri_i32(src2, src2, 8);
-    tcg_gen_ext8u_i32(src2hi, src2);
-
-    tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
-    tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
-    tcg_gen_and_i32(crf, src2lo, src2hi);
-
-    if (ctx->opcode & 0x00200000) {
-        tcg_gen_shri_i32(src2, src2, 8);
-        tcg_gen_ext8u_i32(src2lo, src2);
-        tcg_gen_shri_i32(src2, src2, 8);
-        tcg_gen_ext8u_i32(src2hi, src2);
-        tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
-        tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
-        tcg_gen_and_i32(src2lo, src2lo, src2hi);
-        tcg_gen_or_i32(crf, crf, src2lo);
-    }
-    tcg_gen_shli_i32(crf, crf, CRF_GT_BIT);
-}
-
-#if defined(TARGET_PPC64)
-/* cmpeqb */
-static void gen_cmpeqb(DisasContext *ctx)
-{
-    gen_helper_cmpeqb(cpu_crf[crfD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                      cpu_gpr[rB(ctx->opcode)]);
-}
-#endif
-
-/* isel (PowerPC 2.03 specification) */
-static void gen_isel(DisasContext *ctx)
-{
-    uint32_t bi = rC(ctx->opcode);
-    uint32_t mask = 0x08 >> (bi & 0x03);
-    TCGv t0 = tcg_temp_new();
-    TCGv zr;
-
-    tcg_gen_extu_i32_tl(t0, cpu_crf[bi >> 2]);
-    tcg_gen_andi_tl(t0, t0, mask);
-
-    zr = tcg_constant_tl(0);
-    tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rD(ctx->opcode)], t0, zr,
-                       rA(ctx->opcode) ? cpu_gpr[rA(ctx->opcode)] : zr,
-                       cpu_gpr[rB(ctx->opcode)]);
-}
-
 /* cmpb: PowerPC 2.05 specification */
 static void gen_cmpb(DisasContext *ctx)
 {
@@ -4259,76 +4199,20 @@ static void gen_scv(DisasContext *ctx)
 /***                                Trap                                   ***/
 
 /* Check for unconditional traps (always or never) */
-static bool check_unconditional_trap(DisasContext *ctx)
+static bool check_unconditional_trap(DisasContext *ctx, int to)
 {
     /* Trap never */
-    if (TO(ctx->opcode) == 0) {
+    if (to == 0) {
         return true;
     }
     /* Trap always */
-    if (TO(ctx->opcode) == 31) {
+    if (to == 31) {
         gen_exception_err(ctx, POWERPC_EXCP_PROGRAM, POWERPC_EXCP_TRAP);
         return true;
     }
     return false;
 }
 
-/* tw */
-static void gen_tw(DisasContext *ctx)
-{
-    TCGv_i32 t0;
-
-    if (check_unconditional_trap(ctx)) {
-        return;
-    }
-    t0 = tcg_constant_i32(TO(ctx->opcode));
-    gen_helper_tw(tcg_env, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],
-                  t0);
-}
-
-/* twi */
-static void gen_twi(DisasContext *ctx)
-{
-    TCGv t0;
-    TCGv_i32 t1;
-
-    if (check_unconditional_trap(ctx)) {
-        return;
-    }
-    t0 = tcg_constant_tl(SIMM(ctx->opcode));
-    t1 = tcg_constant_i32(TO(ctx->opcode));
-    gen_helper_tw(tcg_env, cpu_gpr[rA(ctx->opcode)], t0, t1);
-}
-
-#if defined(TARGET_PPC64)
-/* td */
-static void gen_td(DisasContext *ctx)
-{
-    TCGv_i32 t0;
-
-    if (check_unconditional_trap(ctx)) {
-        return;
-    }
-    t0 = tcg_constant_i32(TO(ctx->opcode));
-    gen_helper_td(tcg_env, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],
-                  t0);
-}
-
-/* tdi */
-static void gen_tdi(DisasContext *ctx)
-{
-    TCGv t0;
-    TCGv_i32 t1;
-
-    if (check_unconditional_trap(ctx)) {
-        return;
-    }
-    t0 = tcg_constant_tl(SIMM(ctx->opcode));
-    t1 = tcg_constant_i32(TO(ctx->opcode));
-    gen_helper_td(tcg_env, cpu_gpr[rA(ctx->opcode)], t0, t1);
-}
-#endif
-
 /***                          Processor control                            ***/
 
 /* mcrxr */
@@ -6145,12 +6029,7 @@ GEN_HANDLER_E(brw, 0x1F, 0x1B, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA310),
 GEN_HANDLER_E(brh, 0x1F, 0x1B, 0x06, 0x0000F801, PPC_NONE, PPC2_ISA310),
 #endif
 GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE),
-#if defined(TARGET_PPC64)
-GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300),
-#endif
 GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205),
-GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL),
 GEN_HANDLER2(andi_, "andi.", 0x1C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER2(andis_, "andis.", 0x1D, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(cntlzw, 0x1F, 0x1A, 0x00, 0x00000000, PPC_INTEGER),
@@ -6249,12 +6128,6 @@ GEN_HANDLER(hrfid, 0x13, 0x12, 0x08, 0x03FF8001, PPC_64H),
 /* Top bit of opc2 corresponds with low bit of LEV, so use two handlers */
 GEN_HANDLER(sc, 0x11, 0x11, 0xFF, 0x03FFF01D, PPC_FLOW),
 GEN_HANDLER(sc, 0x11, 0x01, 0xFF, 0x03FFF01D, PPC_FLOW),
-GEN_HANDLER(tw, 0x1F, 0x04, 0x00, 0x00000001, PPC_FLOW),
-GEN_HANDLER(twi, 0x03, 0xFF, 0xFF, 0x00000000, PPC_FLOW),
-#if defined(TARGET_PPC64)
-GEN_HANDLER(td, 0x1F, 0x04, 0x02, 0x00000001, PPC_64B),
-GEN_HANDLER(tdi, 0x02, 0xFF, 0xFF, 0x00000000, PPC_64B),
-#endif
 GEN_HANDLER(mcrxr, 0x1F, 0x00, 0x10, 0x007FF801, PPC_MISC),
 GEN_HANDLER(mfcr, 0x1F, 0x13, 0x00, 0x00000801, PPC_MISC),
 GEN_HANDLER(mfmsr, 0x1F, 0x13, 0x02, 0x001FF801, PPC_MISC),
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
index d8a0a21ebe..b3f071e669 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -289,6 +289,49 @@ TRANS(CMPL, do_cmp_X, false);
 TRANS(CMPI, do_cmp_D, true);
 TRANS(CMPLI, do_cmp_D, false);
 
+static bool trans_CMPRB(DisasContext *ctx, arg_CMPRB *a)
+{
+    TCGv_i32 src1 = tcg_temp_new_i32();
+    TCGv_i32 src2 = tcg_temp_new_i32();
+    TCGv_i32 src2lo = tcg_temp_new_i32();
+    TCGv_i32 src2hi = tcg_temp_new_i32();
+    TCGv_i32 crf = cpu_crf[a->bf];
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    tcg_gen_trunc_tl_i32(src1, cpu_gpr[a->ra]);
+    tcg_gen_trunc_tl_i32(src2, cpu_gpr[a->rb]);
+
+    tcg_gen_andi_i32(src1, src1, 0xFF);
+    tcg_gen_ext8u_i32(src2lo, src2);
+    tcg_gen_shri_i32(src2, src2, 8);
+    tcg_gen_ext8u_i32(src2hi, src2);
+
+    tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
+    tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
+    tcg_gen_and_i32(crf, src2lo, src2hi);
+
+    if (a->l) {
+        tcg_gen_shri_i32(src2, src2, 8);
+        tcg_gen_ext8u_i32(src2lo, src2);
+        tcg_gen_shri_i32(src2, src2, 8);
+        tcg_gen_ext8u_i32(src2hi, src2);
+        tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
+        tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
+        tcg_gen_and_i32(src2lo, src2lo, src2hi);
+        tcg_gen_or_i32(crf, crf, src2lo);
+    }
+    tcg_gen_shli_i32(crf, crf, CRF_GT_BIT);
+    return true;
+}
+
+static bool trans_CMPEQB(DisasContext *ctx, arg_CMPEQB *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    gen_helper_CMPEQB(cpu_crf[a->bf], cpu_gpr[a->ra], cpu_gpr[a->rb]);
+    return true;
+}
+
 /*
  * Fixed-Point Arithmetic Instructions
  */
@@ -642,6 +685,86 @@ TRANS64(DIVDEU, do_dive, gen_helper_DIVDEU);
 TRANS64(MODSD, do_modd, true);
 TRANS64(MODUD, do_modd, false);
 
+/*
+ * Fixed-Point Select Instructions
+ */
+
+static bool trans_ISEL(DisasContext *ctx, arg_ISEL *a)
+{
+    REQUIRE_INSNS_FLAGS(ctx, ISEL);
+    uint32_t bi = a->bc;
+    uint32_t mask = 0x08 >> (bi & 0x03);
+    TCGv t0 = tcg_temp_new();
+    TCGv zr;
+
+    tcg_gen_extu_i32_tl(t0, cpu_crf[bi >> 2]);
+    tcg_gen_andi_tl(t0, t0, mask);
+
+    zr = tcg_constant_tl(0);
+    tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[a->rt], t0, zr,
+                       a->ra ? cpu_gpr[a->ra] : zr,
+                       cpu_gpr[a->rb]);
+    return true;
+}
+
+/*
+ * Fixed-Point Trap Instructions
+ */
+
+static bool trans_TW(DisasContext *ctx, arg_TW *a)
+{
+    TCGv_i32 t0;
+
+    if (check_unconditional_trap(ctx, a->rt)) {
+        return true;
+    }
+    t0 = tcg_constant_i32(a->rt);
+    gen_helper_TW(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0);
+    return true;
+}
+
+static bool trans_TWI(DisasContext *ctx, arg_TWI *a)
+{
+    TCGv t0;
+    TCGv_i32 t1;
+
+    if (check_unconditional_trap(ctx, a->rt)) {
+        return true;
+    }
+    t0 = tcg_constant_tl(a->si);
+    t1 = tcg_constant_i32(a->rt);
+    gen_helper_TW(tcg_env, cpu_gpr[a->ra], t0, t1);
+    return true;
+}
+
+static bool trans_TD(DisasContext *ctx, arg_TD *a)
+{
+    TCGv_i32 t0;
+
+    REQUIRE_64BIT(ctx);
+    if (check_unconditional_trap(ctx, a->rt)) {
+        return true;
+    }
+    t0 = tcg_constant_i32(a->rt);
+    gen_helper_TD(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0);
+    return true;
+}
+
+static bool trans_TDI(DisasContext *ctx, arg_TDI *a)
+{
+    TCGv t0;
+    TCGv_i32 t1;
+
+    REQUIRE_64BIT(ctx);
+    if (check_unconditional_trap(ctx, a->rt)) {
+        return true;
+    }
+    t0 = tcg_constant_tl(a->si);
+    t1 = tcg_constant_i32(a->rt);
+    gen_helper_TD(tcg_env, cpu_gpr[a->ra], t0, t1);
+    return true;
+}
+
 static bool trans_INVALID(DisasContext *ctx, arg_INVALID *a)
 {
     gen_invalid(ctx);
-- 
2.39.3



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 8/8] target/ppc: Move logical fixed-point instructions to decodetree.
  2024-04-16  6:39 [PATCH 0/8] target/ppc: Move fixed-point insns to decodetree Chinmay Rath
                   ` (6 preceding siblings ...)
  2024-04-16  6:39 ` [PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions " Chinmay Rath
@ 2024-04-16  6:39 ` Chinmay Rath
  2024-04-16 19:35   ` Richard Henderson
  7 siblings, 1 reply; 27+ messages in thread
From: Chinmay Rath @ 2024-04-16  6:39 UTC (permalink / raw)
  To: qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, richard.henderson, harshpb

Moving the below instructions to decodetree specification :

	andi[s]., {ori, xori}[s]			: D-form

	{and, andc, nand, or, orc, nor, xor, eqv}[.],
	exts{b, h, w}[.],  cnt{l, t}z{w, d}[.],
	popcnt{b, w, d},  prty{w, d}, cmp, bpermd	: X-form

With this patch, all the fixed-point logical instructions have been
moved to decodetree.
The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
 target/ppc/helper.h                        |   8 +-
 target/ppc/insn32.decode                   |  38 +++
 target/ppc/int_helper.c                    |  10 +-
 target/ppc/translate.c                     | 359 ---------------------
 target/ppc/translate/fixedpoint-impl.c.inc | 269 +++++++++++++++
 5 files changed, 316 insertions(+), 368 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 05f7ab5f6e..b53abd853a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -58,8 +58,8 @@ DEF_HELPER_4(DIVDE, i64, env, i64, i64, i32)
 DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32)
 DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32)
 
-DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_1(POPCNTB, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_2(CMPB, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_3(sraw, tl, env, tl, tl)
 DEF_HELPER_FLAGS_2(CFUGED, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(PDEPD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
@@ -68,8 +68,8 @@ DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_2(CMPEQB, TCG_CALL_NO_RWG_SE, i32, tl, tl)
-DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(POPCNTW, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_2(BPERMD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
 DEF_HELPER_FLAGS_0(DARN32, TCG_CALL_NO_RWG, tl)
 DEF_HELPER_FLAGS_0(DARN64, TCG_CALL_NO_RWG, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 80a7bb1872..3175810190 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -29,6 +29,9 @@
 &D              rt ra si:int64_t
 @D              ...... rt:5 ra:5 si:s16                         &D
 
+&D_ui           rt ra ui:uint64_t
+@D_ui           ...... rt:5 ra:5 ui:16                         &D_ui
+
 &D_bf           bf l:bool ra imm
 @D_bfs          ...... bf:3 . l:1 ra:5 imm:s16                  &D_bf
 @D_bfu          ...... bf:3 . l:1 ra:5 imm:16                   &D_bf
@@ -96,6 +99,9 @@
 &X_sa           rs ra
 @X_sa           ...... rs:5 ra:5 ..... .......... .             &X_sa
 
+&X_sa_rc        rs ra rc
+@X_sa_rc        ...... rs:5 ra:5 ..... .......... rc:1          &X_sa_rc
+
 %x_frtp         22:4 !function=times_2
 %x_frap         17:4 !function=times_2
 %x_frbp         12:4 !function=times_2
@@ -410,6 +416,38 @@ MODUD           011111 ..... ..... ..... 0100001001 -   @X
 
 ## Fixed-Point Logical Instructions
 
+ANDI_           011100 ..... ..... ................     @D_ui
+ANDIS_          011101 ..... ..... ................     @D_ui
+ORI             011000 ..... ..... ................     @D_ui
+ORIS            011001 ..... ..... ................     @D_ui
+XORI            011010 ..... ..... ................     @D_ui
+XORIS           011011 ..... ..... ................     @D_ui
+
+AND             011111 ..... ..... ..... 0000011100 .   @X_rc
+ANDC            011111 ..... ..... ..... 0000111100 .   @X_rc
+NAND            011111 ..... ..... ..... 0111011100 .   @X_rc
+OR              011111 ..... ..... ..... 0110111100 .   @X_rc
+ORC             011111 ..... ..... ..... 0110011100 .   @X_rc
+NOR             011111 ..... ..... ..... 0001111100 .   @X_rc
+XOR             011111 ..... ..... ..... 0100111100 .   @X_rc
+EQV             011111 ..... ..... ..... 0100011100 .   @X_rc
+CMPB            011111 ..... ..... ..... 0111111100 .   @X_rc
+
+EXTSB           011111 ..... ..... ----- 1110111010 .   @X_sa_rc
+EXTSH           011111 ..... ..... ----- 1110011010 .   @X_sa_rc
+EXTSW           011111 ..... ..... ----- 1111011010 .   @X_sa_rc
+CNTLZW          011111 ..... ..... ----- 0000011010 .   @X_sa_rc
+CNTTZW          011111 ..... ..... ----- 1000011010 .   @X_sa_rc
+CNTLZD          011111 ..... ..... ----- 0000111010 .   @X_sa_rc
+CNTTZD          011111 ..... ..... ----- 1000111010 .   @X_sa_rc
+POPCNTB         011111 ..... ..... ----- 0001111010 .   @X_sa_rc
+
+POPCNTW         011111 ..... ..... ----- 0101111010 -   @X_sa
+POPCNTD         011111 ..... ..... ----- 0111111010 -   @X_sa
+PRTYW           011111 ..... ..... ----- 0010011010 -   @X_sa
+PRTYD           011111 ..... ..... ----- 0010111010 -   @X_sa
+
+BPERMD          011111 ..... ..... ..... 0011111100 -   @X
 CFUGED          011111 ..... ..... ..... 0011011100 -   @X
 CNTLZDM         011111 ..... ..... ..... 0000111011 -   @X
 CNTTZDM         011111 ..... ..... ..... 1000111011 -   @X
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index d12dcc28e1..2c6b633d65 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -201,7 +201,7 @@ uint64_t helper_DARN64(void)
     return ret;
 }
 
-uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
+uint64_t helper_BPERMD(uint64_t rs, uint64_t rb)
 {
     int i;
     uint64_t ra = 0;
@@ -219,7 +219,7 @@ uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
 
 #endif
 
-target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
+target_ulong helper_CMPB(target_ulong rs, target_ulong rb)
 {
     target_ulong mask = 0xff;
     target_ulong ra = 0;
@@ -288,7 +288,7 @@ target_ulong helper_srad(CPUPPCState *env, target_ulong value,
 #endif
 
 #if defined(TARGET_PPC64)
-target_ulong helper_popcntb(target_ulong val)
+target_ulong helper_POPCNTB(target_ulong val)
 {
     /* Note that we don't fold past bytes */
     val = (val & 0x5555555555555555ULL) + ((val >>  1) &
@@ -300,7 +300,7 @@ target_ulong helper_popcntb(target_ulong val)
     return val;
 }
 
-target_ulong helper_popcntw(target_ulong val)
+target_ulong helper_POPCNTW(target_ulong val)
 {
     /* Note that we don't fold past words.  */
     val = (val & 0x5555555555555555ULL) + ((val >>  1) &
@@ -316,7 +316,7 @@ target_ulong helper_popcntw(target_ulong val)
     return val;
 }
 #else
-target_ulong helper_popcntb(target_ulong val)
+target_ulong helper_POPCNTB(target_ulong val)
 {
     /* Note that we don't fold past bytes */
     val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 98e642b19a..a246bcb962 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1564,13 +1564,6 @@ static inline void gen_set_Rc0(DisasContext *ctx, TCGv reg)
     }
 }
 
-/* cmpb: PowerPC 2.05 specification */
-static void gen_cmpb(DisasContext *ctx)
-{
-    gen_helper_cmpb(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)],
-                    cpu_gpr[rB(ctx->opcode)]);
-}
-
 /***                           Integer arithmetic                          ***/
 
 static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
@@ -1889,82 +1882,6 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
 }
 
 /***                            Integer logical                            ***/
-#define GEN_LOGICAL2(name, tcg_op, opc, type)                                 \
-static void glue(gen_, name)(DisasContext *ctx)                               \
-{                                                                             \
-    tcg_op(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)],                \
-       cpu_gpr[rB(ctx->opcode)]);                                             \
-    if (unlikely(Rc(ctx->opcode) != 0))                                       \
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);                           \
-}
-
-#define GEN_LOGICAL1(name, tcg_op, opc, type)                                 \
-static void glue(gen_, name)(DisasContext *ctx)                               \
-{                                                                             \
-    tcg_op(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);               \
-    if (unlikely(Rc(ctx->opcode) != 0))                                       \
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);                           \
-}
-
-/* and & and. */
-GEN_LOGICAL2(and, tcg_gen_and_tl, 0x00, PPC_INTEGER);
-/* andc & andc. */
-GEN_LOGICAL2(andc, tcg_gen_andc_tl, 0x01, PPC_INTEGER);
-
-/* andi. */
-static void gen_andi_(DisasContext *ctx)
-{
-    tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)],
-                    UIMM(ctx->opcode));
-    gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
-}
-
-/* andis. */
-static void gen_andis_(DisasContext *ctx)
-{
-    tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)],
-                    UIMM(ctx->opcode) << 16);
-    gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
-}
-
-/* cntlzw */
-static void gen_cntlzw(DisasContext *ctx)
-{
-    TCGv_i32 t = tcg_temp_new_i32();
-
-    tcg_gen_trunc_tl_i32(t, cpu_gpr[rS(ctx->opcode)]);
-    tcg_gen_clzi_i32(t, t, 32);
-    tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t);
-
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
-    }
-}
-
-/* cnttzw */
-static void gen_cnttzw(DisasContext *ctx)
-{
-    TCGv_i32 t = tcg_temp_new_i32();
-
-    tcg_gen_trunc_tl_i32(t, cpu_gpr[rS(ctx->opcode)]);
-    tcg_gen_ctzi_i32(t, t, 32);
-    tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t);
-
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
-    }
-}
-
-/* eqv & eqv. */
-GEN_LOGICAL2(eqv, tcg_gen_eqv_tl, 0x08, PPC_INTEGER);
-/* extsb & extsb. */
-GEN_LOGICAL1(extsb, tcg_gen_ext8s_tl, 0x1D, PPC_INTEGER);
-/* extsh & extsh. */
-GEN_LOGICAL1(extsh, tcg_gen_ext16s_tl, 0x1C, PPC_INTEGER);
-/* nand & nand. */
-GEN_LOGICAL2(nand, tcg_gen_nand_tl, 0x0E, PPC_INTEGER);
-/* nor & nor. */
-GEN_LOGICAL2(nor, tcg_gen_nor_tl, 0x03, PPC_INTEGER);
 
 #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
 static void gen_pause(DisasContext *ctx)
@@ -1978,243 +1895,6 @@ static void gen_pause(DisasContext *ctx)
 }
 #endif /* defined(TARGET_PPC64) */
 
-/* or & or. */
-static void gen_or(DisasContext *ctx)
-{
-    int rs, ra, rb;
-
-    rs = rS(ctx->opcode);
-    ra = rA(ctx->opcode);
-    rb = rB(ctx->opcode);
-    /* Optimisation for mr. ri case */
-    if (rs != ra || rs != rb) {
-        if (rs != rb) {
-            tcg_gen_or_tl(cpu_gpr[ra], cpu_gpr[rs], cpu_gpr[rb]);
-        } else {
-            tcg_gen_mov_tl(cpu_gpr[ra], cpu_gpr[rs]);
-        }
-        if (unlikely(Rc(ctx->opcode) != 0)) {
-            gen_set_Rc0(ctx, cpu_gpr[ra]);
-        }
-    } else if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rs]);
-#if defined(TARGET_PPC64)
-    } else if (rs != 0) { /* 0 is nop */
-        int prio = 0;
-
-        switch (rs) {
-        case 1:
-            /* Set process priority to low */
-            prio = 2;
-            break;
-        case 6:
-            /* Set process priority to medium-low */
-            prio = 3;
-            break;
-        case 2:
-            /* Set process priority to normal */
-            prio = 4;
-            break;
-#if !defined(CONFIG_USER_ONLY)
-        case 31:
-            if (!ctx->pr) {
-                /* Set process priority to very low */
-                prio = 1;
-            }
-            break;
-        case 5:
-            if (!ctx->pr) {
-                /* Set process priority to medium-hight */
-                prio = 5;
-            }
-            break;
-        case 3:
-            if (!ctx->pr) {
-                /* Set process priority to high */
-                prio = 6;
-            }
-            break;
-        case 7:
-            if (ctx->hv && !ctx->pr) {
-                /* Set process priority to very high */
-                prio = 7;
-            }
-            break;
-#endif
-        default:
-            break;
-        }
-        if (prio) {
-            TCGv t0 = tcg_temp_new();
-            gen_load_spr(t0, SPR_PPR);
-            tcg_gen_andi_tl(t0, t0, ~0x001C000000000000ULL);
-            tcg_gen_ori_tl(t0, t0, ((uint64_t)prio) << 50);
-            gen_store_spr(SPR_PPR, t0);
-        }
-#if !defined(CONFIG_USER_ONLY)
-        /*
-         * Pause out of TCG otherwise spin loops with smt_low eat too
-         * much CPU and the kernel hangs.  This applies to all
-         * encodings other than no-op, e.g., miso(rs=26), yield(27),
-         * mdoio(29), mdoom(30), and all currently undefined.
-         */
-        gen_pause(ctx);
-#endif
-#endif
-    }
-}
-/* orc & orc. */
-GEN_LOGICAL2(orc, tcg_gen_orc_tl, 0x0C, PPC_INTEGER);
-
-/* xor & xor. */
-static void gen_xor(DisasContext *ctx)
-{
-    /* Optimisation for "set to zero" case */
-    if (rS(ctx->opcode) != rB(ctx->opcode)) {
-        tcg_gen_xor_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)],
-                       cpu_gpr[rB(ctx->opcode)]);
-    } else {
-        tcg_gen_movi_tl(cpu_gpr[rA(ctx->opcode)], 0);
-    }
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
-    }
-}
-
-/* ori */
-static void gen_ori(DisasContext *ctx)
-{
-    target_ulong uimm = UIMM(ctx->opcode);
-
-    if (rS(ctx->opcode) == rA(ctx->opcode) && uimm == 0) {
-        return;
-    }
-    tcg_gen_ori_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], uimm);
-}
-
-/* oris */
-static void gen_oris(DisasContext *ctx)
-{
-    target_ulong uimm = UIMM(ctx->opcode);
-
-    if (rS(ctx->opcode) == rA(ctx->opcode) && uimm == 0) {
-        /* NOP */
-        return;
-    }
-    tcg_gen_ori_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)],
-                   uimm << 16);
-}
-
-/* xori */
-static void gen_xori(DisasContext *ctx)
-{
-    target_ulong uimm = UIMM(ctx->opcode);
-
-    if (rS(ctx->opcode) == rA(ctx->opcode) && uimm == 0) {
-        /* NOP */
-        return;
-    }
-    tcg_gen_xori_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], uimm);
-}
-
-/* xoris */
-static void gen_xoris(DisasContext *ctx)
-{
-    target_ulong uimm = UIMM(ctx->opcode);
-
-    if (rS(ctx->opcode) == rA(ctx->opcode) && uimm == 0) {
-        /* NOP */
-        return;
-    }
-    tcg_gen_xori_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)],
-                    uimm << 16);
-}
-
-/* popcntb : PowerPC 2.03 specification */
-static void gen_popcntb(DisasContext *ctx)
-{
-    gen_helper_popcntb(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
-}
-
-static void gen_popcntw(DisasContext *ctx)
-{
-#if defined(TARGET_PPC64)
-    gen_helper_popcntw(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
-#else
-    tcg_gen_ctpop_i32(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
-#endif
-}
-
-#if defined(TARGET_PPC64)
-/* popcntd: PowerPC 2.06 specification */
-static void gen_popcntd(DisasContext *ctx)
-{
-    tcg_gen_ctpop_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
-}
-#endif
-
-/* prtyw: PowerPC 2.05 specification */
-static void gen_prtyw(DisasContext *ctx)
-{
-    TCGv ra = cpu_gpr[rA(ctx->opcode)];
-    TCGv rs = cpu_gpr[rS(ctx->opcode)];
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_shri_tl(t0, rs, 16);
-    tcg_gen_xor_tl(ra, rs, t0);
-    tcg_gen_shri_tl(t0, ra, 8);
-    tcg_gen_xor_tl(ra, ra, t0);
-    tcg_gen_andi_tl(ra, ra, (target_ulong)0x100000001ULL);
-}
-
-#if defined(TARGET_PPC64)
-/* prtyd: PowerPC 2.05 specification */
-static void gen_prtyd(DisasContext *ctx)
-{
-    TCGv ra = cpu_gpr[rA(ctx->opcode)];
-    TCGv rs = cpu_gpr[rS(ctx->opcode)];
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_shri_tl(t0, rs, 32);
-    tcg_gen_xor_tl(ra, rs, t0);
-    tcg_gen_shri_tl(t0, ra, 16);
-    tcg_gen_xor_tl(ra, ra, t0);
-    tcg_gen_shri_tl(t0, ra, 8);
-    tcg_gen_xor_tl(ra, ra, t0);
-    tcg_gen_andi_tl(ra, ra, 1);
-}
-#endif
-
-#if defined(TARGET_PPC64)
-/* bpermd */
-static void gen_bpermd(DisasContext *ctx)
-{
-    gen_helper_bpermd(cpu_gpr[rA(ctx->opcode)],
-                      cpu_gpr[rS(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-}
-#endif
-
-#if defined(TARGET_PPC64)
-/* extsw & extsw. */
-GEN_LOGICAL1(extsw, tcg_gen_ext32s_tl, 0x1E, PPC_64B);
-
-/* cntlzd */
-static void gen_cntlzd(DisasContext *ctx)
-{
-    tcg_gen_clzi_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], 64);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
-    }
-}
-
-/* cnttzd */
-static void gen_cnttzd(DisasContext *ctx)
-{
-    tcg_gen_ctzi_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], 64);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
-    }
-}
-#endif
-
 /***                             Integer rotate                            ***/
 
 /* rlwimi & rlwimi. */
@@ -6029,30 +5709,9 @@ GEN_HANDLER_E(brw, 0x1F, 0x1B, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA310),
 GEN_HANDLER_E(brh, 0x1F, 0x1B, 0x06, 0x0000F801, PPC_NONE, PPC2_ISA310),
 #endif
 GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE),
-GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205),
-GEN_HANDLER2(andi_, "andi.", 0x1C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
-GEN_HANDLER2(andis_, "andis.", 0x1D, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(cntlzw, 0x1F, 0x1A, 0x00, 0x00000000, PPC_INTEGER),
-GEN_HANDLER_E(cnttzw, 0x1F, 0x1A, 0x10, 0x00000000, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(copy, 0x1F, 0x06, 0x18, 0x03C00001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(cp_abort, 0x1F, 0x06, 0x1A, 0x03FFF801, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(paste, 0x1F, 0x06, 0x1C, 0x03C00000, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER(or, 0x1F, 0x1C, 0x0D, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(xor, 0x1F, 0x1C, 0x09, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(ori, 0x18, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(oris, 0x19, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(xori, 0x1A, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(xoris, 0x1B, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(popcntb, 0x1F, 0x1A, 0x03, 0x0000F801, PPC_POPCNTB),
-GEN_HANDLER(popcntw, 0x1F, 0x1A, 0x0b, 0x0000F801, PPC_POPCNTWD),
-GEN_HANDLER_E(prtyw, 0x1F, 0x1A, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA205),
-#if defined(TARGET_PPC64)
-GEN_HANDLER(popcntd, 0x1F, 0x1A, 0x0F, 0x0000F801, PPC_POPCNTWD),
-GEN_HANDLER(cntlzd, 0x1F, 0x1A, 0x01, 0x00000000, PPC_64B),
-GEN_HANDLER_E(cnttzd, 0x1F, 0x1A, 0x11, 0x00000000, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(prtyd, 0x1F, 0x1A, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA205),
-GEN_HANDLER_E(bpermd, 0x1F, 0x1C, 0x07, 0x00000001, PPC_NONE, PPC2_PERM_ISA206),
-#endif
 GEN_HANDLER(rlwimi, 0x14, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(rlwinm, 0x15, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(rlwnm, 0x17, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
@@ -6228,24 +5887,6 @@ GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x00000001, PPC_ALTIVEC),
 GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC),
 GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff0000, PPC_ALTIVEC),
 
-#undef GEN_LOGICAL1
-#undef GEN_LOGICAL2
-#define GEN_LOGICAL2(name, tcg_op, opc, type)                                 \
-GEN_HANDLER(name, 0x1F, 0x1C, opc, 0x00000000, type)
-#define GEN_LOGICAL1(name, tcg_op, opc, type)                                 \
-GEN_HANDLER(name, 0x1F, 0x1A, opc, 0x00000000, type)
-GEN_LOGICAL2(and, tcg_gen_and_tl, 0x00, PPC_INTEGER),
-GEN_LOGICAL2(andc, tcg_gen_andc_tl, 0x01, PPC_INTEGER),
-GEN_LOGICAL2(eqv, tcg_gen_eqv_tl, 0x08, PPC_INTEGER),
-GEN_LOGICAL1(extsb, tcg_gen_ext8s_tl, 0x1D, PPC_INTEGER),
-GEN_LOGICAL1(extsh, tcg_gen_ext16s_tl, 0x1C, PPC_INTEGER),
-GEN_LOGICAL2(nand, tcg_gen_nand_tl, 0x0E, PPC_INTEGER),
-GEN_LOGICAL2(nor, tcg_gen_nor_tl, 0x03, PPC_INTEGER),
-GEN_LOGICAL2(orc, tcg_gen_orc_tl, 0x0C, PPC_INTEGER),
-#if defined(TARGET_PPC64)
-GEN_LOGICAL1(extsw, tcg_gen_ext32s_tl, 0x1E, PPC_64B),
-#endif
-
 #if defined(TARGET_PPC64)
 #undef GEN_PPC64_R2
 #undef GEN_PPC64_R4
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
index b3f071e669..ddc8993def 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -799,6 +799,252 @@ TRANS(SETBCR, do_set_bool_cond, false, true)
 TRANS(SETNBC, do_set_bool_cond, true, false)
 TRANS(SETNBCR, do_set_bool_cond, true, true)
 
+/*
+ * Fixed-Point Logical Instructions
+ */
+
+static bool do_addi_(DisasContext *ctx, arg_D_ui *a, bool shift)
+{
+    tcg_gen_andi_tl(cpu_gpr[a->ra], cpu_gpr[a->rt], shift ? a->ui << 16 : a->ui);
+    gen_set_Rc0(ctx, cpu_gpr[a->ra]);
+    return true;
+}
+
+static bool do_ori(DisasContext *ctx, arg_D_ui *a, bool shift)
+{
+    if (a->rt == a->ra && a->ui == 0) {
+        /* NOP */
+        return true;
+    }
+    tcg_gen_ori_tl(cpu_gpr[a->ra], cpu_gpr[a->rt], shift ? a->ui << 16 : a->ui);
+    return true;
+}
+
+static bool do_xori(DisasContext *ctx, arg_D_ui *a, bool shift)
+{
+    if (a->rt == a->ra && a->ui == 0) {
+        /* NOP */
+        return true;
+    }
+    tcg_gen_xori_tl(cpu_gpr[a->ra], cpu_gpr[a->rt], shift ? a->ui << 16 : a->ui);
+    return true;
+}
+
+static bool do_logical1(DisasContext *ctx, arg_X_sa_rc *a,
+                        void (*helper)(TCGv, TCGv))
+{
+    helper(cpu_gpr[a->ra], cpu_gpr[a->rs]);
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->ra]);
+    }
+    return true;
+}
+
+static bool do_logical2(DisasContext *ctx, arg_X_rc *a,
+                        void (*helper)(TCGv, TCGv, TCGv))
+{
+    helper(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]);
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->ra]);
+    }
+    return true;
+}
+
+static bool trans_OR(DisasContext *ctx, arg_OR *a)
+{
+    /* Optimisation for mr. ri case */
+    if (a->rt != a->ra || a->rt != a->rb) {
+        if (a->rt != a->rb) {
+            tcg_gen_or_tl(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]);
+        } else {
+            tcg_gen_mov_tl(cpu_gpr[a->ra], cpu_gpr[a->rt]);
+        }
+        if (unlikely(a->rc)) {
+            gen_set_Rc0(ctx, cpu_gpr[a->ra]);
+        }
+    } else if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+#if defined(TARGET_PPC64)
+    } else if (a->rt != 0) { /* 0 is nop */
+        int prio = 0;
+
+        switch (a->rt) {
+        case 1:
+            /* Set process priority to low */
+            prio = 2;
+            break;
+        case 6:
+            /* Set process priority to medium-low */
+            prio = 3;
+            break;
+        case 2:
+            /* Set process priority to normal */
+            prio = 4;
+            break;
+#if !defined(CONFIG_USER_ONLY)
+        case 31:
+            if (!ctx->pr) {
+                /* Set process priority to very low */
+                prio = 1;
+            }
+            break;
+        case 5:
+            if (!ctx->pr) {
+                /* Set process priority to medium-hight */
+                prio = 5;
+            }
+            break;
+        case 3:
+            if (!ctx->pr) {
+                /* Set process priority to high */
+                prio = 6;
+            }
+            break;
+        case 7:
+            if (ctx->hv && !ctx->pr) {
+                /* Set process priority to very high */
+                prio = 7;
+            }
+            break;
+#endif
+        default:
+            break;
+        }
+        if (prio) {
+            TCGv t0 = tcg_temp_new();
+            gen_load_spr(t0, SPR_PPR);
+            tcg_gen_andi_tl(t0, t0, ~0x001C000000000000ULL);
+            tcg_gen_ori_tl(t0, t0, ((uint64_t)prio) << 50);
+            gen_store_spr(SPR_PPR, t0);
+        }
+#if !defined(CONFIG_USER_ONLY)
+        /*
+         * Pause out of TCG otherwise spin loops with smt_low eat too
+         * much CPU and the kernel hangs.  This applies to all
+         * encodings other than no-op, e.g., miso(rs=26), yield(27),
+         * mdoio(29), mdoom(30), and all currently undefined.
+         */
+        gen_pause(ctx);
+#endif
+#endif
+    }
+
+    return true;
+}
+
+static bool trans_XOR(DisasContext *ctx, arg_XOR *a)
+{
+    /* Optimisation for "set to zero" case */
+    if (a->rt != a->rb) {
+        tcg_gen_xor_tl(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]);
+    } else {
+        tcg_gen_movi_tl(cpu_gpr[a->ra], 0);
+    }
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->ra]);
+    }
+    return true;
+}
+
+static bool trans_CMPB(DisasContext *ctx, arg_CMPB *a)
+{
+    REQUIRE_INSNS_FLAGS2(ctx, ISA205);
+    gen_helper_CMPB(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]);
+    return true;
+}
+
+static bool do_cntzw(DisasContext *ctx, arg_X_sa_rc *a,
+                    void (*helper)(TCGv_i32, TCGv_i32, uint32_t))
+{
+    TCGv_i32 t = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(t, cpu_gpr[a->rs]);
+    helper(t, t, 32);
+    tcg_gen_extu_i32_tl(cpu_gpr[a->ra], t);
+
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->ra]);
+    }
+    return true;
+}
+
+static bool do_cntzd(DisasContext *ctx, arg_X_sa_rc *a,
+                    void (*helper)(TCGv_i64, TCGv_i64, uint64_t))
+{
+    helper(cpu_gpr[a->ra], cpu_gpr[a->rs], 64);
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->ra]);
+    }
+    return true;
+}
+
+static bool trans_POPCNTB(DisasContext *ctx, arg_POPCNTB *a)
+{
+    REQUIRE_INSNS_FLAGS(ctx, POPCNTB);
+    gen_helper_POPCNTB(cpu_gpr[a->ra], cpu_gpr[a->rs]);
+    return true;
+}
+
+static bool trans_POPCNTW(DisasContext *ctx, arg_POPCNTW *a)
+{
+    REQUIRE_INSNS_FLAGS(ctx, POPCNTWD);
+#if defined(TARGET_PPC64)
+    gen_helper_POPCNTW(cpu_gpr[a->ra], cpu_gpr[a->rs]);
+#else
+    tcg_gen_ctpop_i32(cpu_gpr[a->ra], cpu_gpr[a->rs]);
+#endif
+    return true;
+}
+
+static bool trans_POPCNTD(DisasContext *ctx, arg_POPCNTD *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS(ctx, POPCNTWD);
+    tcg_gen_ctpop_i64(cpu_gpr[a->ra], cpu_gpr[a->rs]);
+    return true;
+}
+
+static bool trans_PRTYW(DisasContext *ctx, arg_PRTYW *a)
+{
+    TCGv ra = cpu_gpr[a->ra];
+    TCGv rs = cpu_gpr[a->rs];
+    TCGv t0 = tcg_temp_new();
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA205);
+    tcg_gen_shri_tl(t0, rs, 16);
+    tcg_gen_xor_tl(ra, rs, t0);
+    tcg_gen_shri_tl(t0, ra, 8);
+    tcg_gen_xor_tl(ra, ra, t0);
+    tcg_gen_andi_tl(ra, ra, (target_ulong)0x100000001ULL);
+    return true;
+}
+
+static bool trans_PRTYD(DisasContext *ctx, arg_PRTYD *a)
+{
+    TCGv ra = cpu_gpr[a->ra];
+    TCGv rs = cpu_gpr[a->rs];
+    TCGv t0 = tcg_temp_new();
+
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA205);
+    tcg_gen_shri_tl(t0, rs, 32);
+    tcg_gen_xor_tl(ra, rs, t0);
+    tcg_gen_shri_tl(t0, ra, 16);
+    tcg_gen_xor_tl(ra, ra, t0);
+    tcg_gen_shri_tl(t0, ra, 8);
+    tcg_gen_xor_tl(ra, ra, t0);
+    tcg_gen_andi_tl(ra, ra, 1);
+    return true;
+}
+
+static bool trans_BPERMD(DisasContext *ctx, arg_BPERMD *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, PERM_ISA206);
+    gen_helper_BPERMD(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]);
+    return true;
+}
+
 static bool trans_CFUGED(DisasContext *ctx, arg_X *a)
 {
     REQUIRE_64BIT(ctx);
@@ -887,6 +1133,29 @@ static bool trans_PEXTD(DisasContext *ctx, arg_X *a)
     return true;
 }
 
+TRANS(ANDI_, do_addi_, false);
+TRANS(ANDIS_, do_addi_, true);
+TRANS(ORI, do_ori, false);
+TRANS(ORIS, do_ori, true);
+TRANS(XORI, do_xori, false);
+TRANS(XORIS, do_xori, true);
+
+TRANS(AND, do_logical2, tcg_gen_and_tl);
+TRANS(ANDC, do_logical2, tcg_gen_andc_tl);
+TRANS(NAND, do_logical2, tcg_gen_nand_tl);
+TRANS(ORC, do_logical2, tcg_gen_orc_tl);
+TRANS(NOR, do_logical2, tcg_gen_nor_tl);
+TRANS(EQV, do_logical2, tcg_gen_eqv_tl);
+TRANS(EXTSB, do_logical1, tcg_gen_ext8s_tl);
+TRANS(EXTSH, do_logical1, tcg_gen_ext16s_tl);
+
+TRANS(CNTLZW, do_cntzw, tcg_gen_clzi_i32);
+TRANS_FLAGS2(ISA300, CNTTZW, do_cntzw, tcg_gen_ctzi_i32);
+
+TRANS64(EXTSW, do_logical1, tcg_gen_ext32s_tl);
+TRANS64(CNTLZD, do_cntzd, tcg_gen_clzi_i64);
+TRANS64_FLAGS2(ISA300, CNTTZD, do_cntzd, tcg_gen_ctzi_i64);
+
 static bool trans_ADDG6S(DisasContext *ctx, arg_X *a)
 {
     const target_ulong carry_bits = (target_ulong)-1 / 0xf;
-- 
2.39.3



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree.
  2024-04-16  6:39 ` [PATCH 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions " Chinmay Rath
@ 2024-04-16 17:56   ` Richard Henderson
  2024-04-19  9:08     ` Chinmay Rath
  0 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2024-04-16 17:56 UTC (permalink / raw)
  To: Chinmay Rath, qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, harshpb

On 4/15/24 23:39, Chinmay Rath wrote:
> Moving the following instructions to decodetree specification :
> 	mulli                   	: D-form
> 	mul{lw, lwo, hw, hwu}[.]	: XO-form
> 
> The changes were verified by validating that the tcg ops generated by those
> instructions remain the same, which were captured with the '-d in_asm,op' flag.
> 
> Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
> ---
>   target/ppc/insn32.decode                   |  9 +++
>   target/ppc/translate.c                     | 89 ----------------------
>   target/ppc/translate/fixedpoint-impl.c.inc | 71 +++++++++++++++++
>   3 files changed, 80 insertions(+), 89 deletions(-)

This is an accurate reorg of the current code, so
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

However, as follow-up, the code generation could be cleaned up:


> +static bool trans_MULLW(DisasContext *ctx, arg_MULLW *a)
> +{
> +#if defined(TARGET_PPC64)
> +    TCGv_i64 t0, t1;
> +    t0 = tcg_temp_new_i64();
> +    t1 = tcg_temp_new_i64();
> +    tcg_gen_ext32s_tl(t0, cpu_gpr[a->ra]);
> +    tcg_gen_ext32s_tl(t1, cpu_gpr[a->rb]);
> +    tcg_gen_mul_i64(cpu_gpr[a->rt], t0, t1);
> +#else
> +    tcg_gen_mul_i32(cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]);
> +#endif
> +    if (unlikely(a->rc)) {
> +        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
> +    }
> +    return true;
> +}

Without ifdefs:

     TCGv t0 = tcg_temp_new();
     TCGv t1 = tcg_temp_new();

     tcg_gen_ext32s_tl(t0, ra);
     tcg_gen_ext32s_tl(t1, rb);
     tcg_gen_mul_tl(rt, t0, t1);

For ppc32, ext32s_tl will turn into a mov, which will be optimized away.  So ideal code 
generation for both modes.


> +static bool trans_MULLWO(DisasContext *ctx, arg_MULLWO *a)
> +{
> +    TCGv_i32 t0 = tcg_temp_new_i32();
> +    TCGv_i32 t1 = tcg_temp_new_i32();
> +
> +    tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]);
> +    tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]);
> +    tcg_gen_muls2_i32(t0, t1, t0, t1);
> +#if defined(TARGET_PPC64)
> +    tcg_gen_concat_i32_i64(cpu_gpr[a->rt], t0, t1);
> +#else
> +    tcg_gen_mov_i32(cpu_gpr[a->rt], t0);
> +#endif
> +
> +    tcg_gen_sari_i32(t0, t0, 31);
> +    tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
> +    tcg_gen_extu_i32_tl(cpu_ov, t0);

Usually hosts need to create the full 64-bit product and then break it apart for 
tcg_gen_muls2_i32, so split followed immediately by concatenate isn't great.


     TCGv t0 = tcg_temp_new();
     TCGv t1 = tcg_temp_new();

#ifdef TARGET_PPC64
     tcg_gen_ext32s_i64(t0, ra);
     tcg_gen_ext32s_i64(t1, rb);
     tcg_gen_mul_i64(rt, t0, t1);
     tcg_gen_sextract_i64(t0, rt, 31, 1);
     tcg_gen_sari_i64(t1, rt, 32);
#else
     tcg_gen_muls2_i32(rt, t1, ra, rb);
     tcg_gen_sari_i32(t0, rt, 31);
#endif
     tcg_gen_setcond_tl(TCG_COND_NE, cpu_ov, t0, t1);


> +    if (is_isa300(ctx)) {
> +        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
> +    }
> +    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
> +
> +    if (unlikely(a->rc)) {
> +        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
> +    }
> +    return true;
> +}


r~


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/8] target/ppc: Make divw[u] handler method decodetree compatible.
  2024-04-16  6:39 ` [PATCH 2/8] target/ppc: Make divw[u] handler method decodetree compatible Chinmay Rath
@ 2024-04-16 17:57   ` Richard Henderson
  2024-04-19  9:17     ` Chinmay Rath
  0 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2024-04-16 17:57 UTC (permalink / raw)
  To: Chinmay Rath, qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, harshpb

On 4/15/24 23:39, Chinmay Rath wrote:
> The handler methods for divw[u] instructions internally use Rc(ctx->opcode),
> for extraction of Rc field of instructions, which poses a problem if we move
> the above said instructions to decodetree, as the ctx->opcode field is not
> popluated in decodetree. Hence, making it decodetree compatible, so that the
> mentioned insns can be safely move to decodetree specs.
> 
> Signed-off-by: Chinmay Rath<rathc@linux.ibm.com>
> ---
>   target/ppc/translate.c | 9 +++++----
>   1 file changed, 5 insertions(+), 4 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

> +static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret,
> +                                     TCGv arg1, TCGv arg2, bool sign,
> +                                     bool compute_ov, bool compute_rc0)

Could drop the inline at the same time.
Let the compiler decide.


r~


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/8] target/ppc: Move divw[u, e, eu] instructions to decodetree.
  2024-04-16  6:39 ` [PATCH 3/8] target/ppc: Move divw[u, e, eu] instructions to decodetree Chinmay Rath
@ 2024-04-16 18:19   ` Richard Henderson
  2024-04-19  9:18     ` Chinmay Rath
  0 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2024-04-16 18:19 UTC (permalink / raw)
  To: Chinmay Rath, qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, harshpb

On 4/15/24 23:39, Chinmay Rath wrote:
> Moving the following instructions to decodetree specification :
> 	 divw[u, e, eu][o][.] 	: XO-form
> 
> The changes were verified by validating that the tcg ops generated by those
> instructions remain the same, which were captured with the '-d in_asm,op' flag.
> 
> Signed-off-by: Chinmay Rath<rathc@linux.ibm.com>
> ---
>   target/ppc/helper.h                        |  4 +--
>   target/ppc/insn32.decode                   |  5 ++++
>   target/ppc/int_helper.c                    |  4 +--
>   target/ppc/translate.c                     | 31 ----------------------
>   target/ppc/translate/fixedpoint-impl.c.inc | 24 +++++++++++++++++
>   5 files changed, 33 insertions(+), 35 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 4/8] target/ppc: Move neg, darn, mod{sw, uw} to decodetree.
  2024-04-16  6:39 ` [PATCH 4/8] target/ppc: Move neg, darn, mod{sw, uw} " Chinmay Rath
@ 2024-04-16 18:25   ` Richard Henderson
  2024-04-19  9:18     ` Chinmay Rath
  0 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2024-04-16 18:25 UTC (permalink / raw)
  To: Chinmay Rath, qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, harshpb

On 4/15/24 23:39, Chinmay Rath wrote:
> Moving the below instructions to decodetree specification :
> 
> 	neg[o][.]       	: XO-form
> 	mod{sw, uw}, darn	: X-form
> 
> The changes were verified by validating that the tcg ops generated by those
> instructions remain the same, which were captured with the '-d in_asm,op' flag.
> 
> Signed-off-by: Chinmay Rath<rathc@linux.ibm.com>
> ---
>   target/ppc/helper.h                        |  4 +-
>   target/ppc/insn32.decode                   |  8 ++++
>   target/ppc/int_helper.c                    |  4 +-
>   target/ppc/translate.c                     | 56 ----------------------
>   target/ppc/translate/fixedpoint-impl.c.inc | 44 +++++++++++++++++
>   5 files changed, 56 insertions(+), 60 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree.
  2024-04-16  6:39 ` [PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) " Chinmay Rath
@ 2024-04-16 18:36   ` Richard Henderson
  2024-04-19  9:25     ` Chinmay Rath
  0 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2024-04-16 18:36 UTC (permalink / raw)
  To: Chinmay Rath, qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, harshpb

On 4/15/24 23:39, Chinmay Rath wrote:
> +static bool trans_MADDHDU(DisasContext *ctx, arg_MADDHDU *a)
...
> +    tcg_gen_movi_i64(t1, 0);

Drop the movi.

> +    tcg_gen_add2_i64(t1, cpu_gpr[a->vrt], lo, hi, cpu_gpr[a->rc], t1);

Use tcg_constant_i64(0).

With that,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 6/8] target/ppc: Move div/mod fixed-point insns (64 bits operands) to decodetree.
  2024-04-16  6:39 ` [PATCH 6/8] target/ppc: Move div/mod fixed-point insns (64 bits " Chinmay Rath
@ 2024-04-16 18:38   ` Richard Henderson
  2024-04-19  9:26     ` Chinmay Rath
  0 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2024-04-16 18:38 UTC (permalink / raw)
  To: Chinmay Rath, qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, harshpb

On 4/15/24 23:39, Chinmay Rath wrote:
> Moving the below instructions to decodetree specification :
> 
> 	divd[u, e, eu][o][.]	: XO-form
> 	mod{sd, ud}		: X-form
> 
> With this patch, all the fixed-point arithmetic instructions have been
> moved to decodetree.
> The changes were verified by validating that the tcg ops generated by those
> instructions remain the same, which were captured using the '-d in_asm,op' flag.
> Also, remaned do_divwe method in fixedpoint-impl.c.inc to do_dive because it is
> now used to divide doubleword operands as well, and not just words.
> 
> Signed-off-by: Chinmay Rath<rathc@linux.ibm.com>
> ---
>   target/ppc/helper.h                        |  4 +-
>   target/ppc/insn32.decode                   |  8 +++
>   target/ppc/int_helper.c                    |  4 +-
>   target/ppc/translate.c                     | 65 ++--------------------
>   target/ppc/translate/fixedpoint-impl.c.inc | 29 +++++++++-
>   5 files changed, 42 insertions(+), 68 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.
  2024-04-16  6:39 ` [PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions " Chinmay Rath
@ 2024-04-16 19:20   ` Richard Henderson
  2024-04-19  9:28     ` Chinmay Rath
  0 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2024-04-16 19:20 UTC (permalink / raw)
  To: Chinmay Rath, qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, harshpb

On 4/15/24 23:39, Chinmay Rath wrote:
> Moving the following instructions to decodetree specification :
> 
> 	cmp{rb, eqb}, t{w, d}	: X-form
> 	t{w, d}i		: D-form
> 	isel			: A-form
> 
> The changes were verified by validating that the tcg ops generated by those
> instructions remain the same, which were captured using the '-d in_asm,op' flag.
> 
> Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>

A faithful reorg of the existing code, so,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

Notes for improvement:

> +static bool trans_CMPRB(DisasContext *ctx, arg_CMPRB *a)
> +{
> +    TCGv_i32 src1 = tcg_temp_new_i32();
> +    TCGv_i32 src2 = tcg_temp_new_i32();
> +    TCGv_i32 src2lo = tcg_temp_new_i32();
> +    TCGv_i32 src2hi = tcg_temp_new_i32();
> +    TCGv_i32 crf = cpu_crf[a->bf];
> +
> +    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
> +    tcg_gen_trunc_tl_i32(src1, cpu_gpr[a->ra]);
> +    tcg_gen_trunc_tl_i32(src2, cpu_gpr[a->rb]);
> +
> +    tcg_gen_andi_i32(src1, src1, 0xFF);
> +    tcg_gen_ext8u_i32(src2lo, src2);
> +    tcg_gen_shri_i32(src2, src2, 8);
> +    tcg_gen_ext8u_i32(src2hi, src2);

tcg_gen_extract_i32(src2hi, src2, 8, 8);

> +
> +    tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
> +    tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
> +    tcg_gen_and_i32(crf, src2lo, src2hi);
> +
> +    if (a->l) {
> +        tcg_gen_shri_i32(src2, src2, 8);
> +        tcg_gen_ext8u_i32(src2lo, src2);

tcg_gen_extract_i32(src2lo, src2, 16, 8);

> +        tcg_gen_shri_i32(src2, src2, 8);
> +        tcg_gen_ext8u_i32(src2hi, src2);

tcg_gen_extract_i32(src2hi, src2, 24, 8);

> +/*
> + * Fixed-Point Trap Instructions
> + */
> +
> +static bool trans_TW(DisasContext *ctx, arg_TW *a)
> +{
> +    TCGv_i32 t0;
> +
> +    if (check_unconditional_trap(ctx, a->rt)) {
> +        return true;
> +    }
> +    t0 = tcg_constant_i32(a->rt);
> +    gen_helper_TW(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0);
> +    return true;
> +}
> +
> +static bool trans_TWI(DisasContext *ctx, arg_TWI *a)
> +{
> +    TCGv t0;
> +    TCGv_i32 t1;
> +
> +    if (check_unconditional_trap(ctx, a->rt)) {
> +        return true;
> +    }
> +    t0 = tcg_constant_tl(a->si);
> +    t1 = tcg_constant_i32(a->rt);
> +    gen_helper_TW(tcg_env, cpu_gpr[a->ra], t0, t1);
> +    return true;
> +}
> +
> +static bool trans_TD(DisasContext *ctx, arg_TD *a)
> +{
> +    TCGv_i32 t0;
> +
> +    REQUIRE_64BIT(ctx);
> +    if (check_unconditional_trap(ctx, a->rt)) {
> +        return true;
> +    }
> +    t0 = tcg_constant_i32(a->rt);
> +    gen_helper_TD(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0);
> +    return true;
> +}
> +
> +static bool trans_TDI(DisasContext *ctx, arg_TDI *a)
> +{
> +    TCGv t0;
> +    TCGv_i32 t1;
> +
> +    REQUIRE_64BIT(ctx);
> +    if (check_unconditional_trap(ctx, a->rt)) {
> +        return true;
> +    }
> +    t0 = tcg_constant_tl(a->si);
> +    t1 = tcg_constant_i32(a->rt);
> +    gen_helper_TD(tcg_env, cpu_gpr[a->ra], t0, t1);
> +    return true;
> +}

See target/sparc/translate.c, delay_exception, for a method of implementing 
compare-and-trap inline with no inline branch penalty.

static void do_conditional_trap(DisasContext *ctx, unsigned to, TCGv a, TCGv b)
{
     static const TCGCond ucond[8] = {
         TCG_COND_NEVER, TCG_COND_GTU, TCG_COND_LTU, TCG_COND_NE,
         TCG_COND_EQ,    TCG_COND_GEU, TCG_COND_LEU, TCG_COND_ALWAYS,
     };
     static const TCGCond scond[8] = {
         TCG_COND_NEVER, TCG_COND_EQ,  TCG_COND_GT,  TCG_COND_GE,
         TCG_COND_LT,    TCG_COND_LE,  TCG_COND_NE,  TCG_COND_ALWAYS,
     };

     TCGCond uc = ucond[to & 7];
     TCGCond sc = scond[to >> 2];

     /* There is overlap with EQ; we may not need both comparisons. */
     if (!(to & 0x18)) {
         sc = TCG_COND_NEVER;
     } else if (!(to & 0x03)) {
         uc = TCG_COND_NEVER;
     }

     if (uc == TCG_COND_ALWAYS || sc == TCG_COND_ALWAYS) {
         unconditional trap;
         return true;
     }
     if (uc == TCG_COND_NEVER && sc == TCG_COND_NEVER) {
         return true;
     }

     e = delay_exception(ctx, POWERPC_EXCP_TRAP);

     if (uc != TCG_COND_NEVER) {
         tcg_gen_brcond_tl(uc, a, b, e->lab);
     }
     if (sc != TCG_COND_NEVER) {
         tcg_gen_brcond_tl(sc, a, b, e->lab);
     }
     return true;
}

bool trans_TW(...)
{
     TCGv a = tcg_temp_new();
     TCGv b = tcg_temp_new();

     /* Note that consistent sign extensions work for unsigned comparisons. */
     tcg_gen_exts_i32_tl(a, ra);
     tcg_gen_exts_i32_tl(b, rb);
     return do_conditional_trap(ctx, to, a, b);
}

etc.


r~


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 8/8] target/ppc: Move logical fixed-point instructions to decodetree.
  2024-04-16  6:39 ` [PATCH 8/8] target/ppc: Move logical fixed-point " Chinmay Rath
@ 2024-04-16 19:35   ` Richard Henderson
  2024-04-19  9:29     ` Chinmay Rath
  0 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2024-04-16 19:35 UTC (permalink / raw)
  To: Chinmay Rath, qemu-ppc; +Cc: qemu-devel, npiggin, danielhb413, harshpb

On 4/15/24 23:39, Chinmay Rath wrote:
> Moving the below instructions to decodetree specification :
> 
> 	andi[s]., {ori, xori}[s]			: D-form
> 
> 	{and, andc, nand, or, orc, nor, xor, eqv}[.],
> 	exts{b, h, w}[.],  cnt{l, t}z{w, d}[.],
> 	popcnt{b, w, d},  prty{w, d}, cmp, bpermd	: X-form
> 
> With this patch, all the fixed-point logical instructions have been
> moved to decodetree.
> The changes were verified by validating that the tcg ops generated by those
> instructions remain the same, which were captured with the '-d in_asm,op' flag.
> 
> Signed-off-by: Chinmay Rath<rathc@linux.ibm.com>
> ---
>   target/ppc/helper.h                        |   8 +-
>   target/ppc/insn32.decode                   |  38 +++
>   target/ppc/int_helper.c                    |  10 +-
>   target/ppc/translate.c                     | 359 ---------------------
>   target/ppc/translate/fixedpoint-impl.c.inc | 269 +++++++++++++++
>   5 files changed, 316 insertions(+), 368 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree.
  2024-04-16 17:56   ` Richard Henderson
@ 2024-04-19  9:08     ` Chinmay Rath
  0 siblings, 0 replies; 27+ messages in thread
From: Chinmay Rath @ 2024-04-19  9:08 UTC (permalink / raw)
  To: Richard Henderson, Chinmay Rath, qemu-ppc
  Cc: qemu-devel, npiggin, danielhb413, harshpb

Hi Richard,

On 4/16/24 23:26, Richard Henderson wrote:
> On 4/15/24 23:39, Chinmay Rath wrote:
>> Moving the following instructions to decodetree specification :
>>     mulli                       : D-form
>>     mul{lw, lwo, hw, hwu}[.]    : XO-form
>>
>> The changes were verified by validating that the tcg ops generated by 
>> those
>> instructions remain the same, which were captured with the '-d 
>> in_asm,op' flag.
>>
>> Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
>> ---
>>   target/ppc/insn32.decode                   |  9 +++
>>   target/ppc/translate.c                     | 89 ----------------------
>>   target/ppc/translate/fixedpoint-impl.c.inc | 71 +++++++++++++++++
>>   3 files changed, 80 insertions(+), 89 deletions(-)
>
> This is an accurate reorg of the current code, so
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

Thank you.
>
> However, as follow-up, the code generation could be cleaned up:
>
>
>> +static bool trans_MULLW(DisasContext *ctx, arg_MULLW *a)
>> +{
>> +#if defined(TARGET_PPC64)
>> +    TCGv_i64 t0, t1;
>> +    t0 = tcg_temp_new_i64();
>> +    t1 = tcg_temp_new_i64();
>> +    tcg_gen_ext32s_tl(t0, cpu_gpr[a->ra]);
>> +    tcg_gen_ext32s_tl(t1, cpu_gpr[a->rb]);
>> +    tcg_gen_mul_i64(cpu_gpr[a->rt], t0, t1);
>> +#else
>> +    tcg_gen_mul_i32(cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]);
>> +#endif
>> +    if (unlikely(a->rc)) {
>> +        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
>> +    }
>> +    return true;
>> +}
>
> Without ifdefs:
>
>     TCGv t0 = tcg_temp_new();
>     TCGv t1 = tcg_temp_new();
>
>     tcg_gen_ext32s_tl(t0, ra);
>     tcg_gen_ext32s_tl(t1, rb);
>     tcg_gen_mul_tl(rt, t0, t1);
>
> For ppc32, ext32s_tl will turn into a mov, which will be optimized 
> away.  So ideal code generation for both modes.
>
>
>> +static bool trans_MULLWO(DisasContext *ctx, arg_MULLWO *a)
>> +{
>> +    TCGv_i32 t0 = tcg_temp_new_i32();
>> +    TCGv_i32 t1 = tcg_temp_new_i32();
>> +
>> +    tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]);
>> +    tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]);
>> +    tcg_gen_muls2_i32(t0, t1, t0, t1);
>> +#if defined(TARGET_PPC64)
>> +    tcg_gen_concat_i32_i64(cpu_gpr[a->rt], t0, t1);
>> +#else
>> +    tcg_gen_mov_i32(cpu_gpr[a->rt], t0);
>> +#endif
>> +
>> +    tcg_gen_sari_i32(t0, t0, 31);
>> +    tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
>> +    tcg_gen_extu_i32_tl(cpu_ov, t0);
>
> Usually hosts need to create the full 64-bit product and then break it 
> apart for tcg_gen_muls2_i32, so split followed immediately by 
> concatenate isn't great.
>
>
>     TCGv t0 = tcg_temp_new();
>     TCGv t1 = tcg_temp_new();
>
> #ifdef TARGET_PPC64
>     tcg_gen_ext32s_i64(t0, ra);
>     tcg_gen_ext32s_i64(t1, rb);
>     tcg_gen_mul_i64(rt, t0, t1);
>     tcg_gen_sextract_i64(t0, rt, 31, 1);
>     tcg_gen_sari_i64(t1, rt, 32);
> #else
>     tcg_gen_muls2_i32(rt, t1, ra, rb);
>     tcg_gen_sari_i32(t0, rt, 31);
> #endif
>     tcg_gen_setcond_tl(TCG_COND_NE, cpu_ov, t0, t1);
>
Sure, will update in v2.
Thanks,
Chinmay
>
>> +    if (is_isa300(ctx)) {
>> +        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
>> +    }
>> +    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
>> +
>> +    if (unlikely(a->rc)) {
>> +        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
>> +    }
>> +    return true;
>> +}
>
>
> r~
>



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/8] target/ppc: Make divw[u] handler method decodetree compatible.
  2024-04-16 17:57   ` Richard Henderson
@ 2024-04-19  9:17     ` Chinmay Rath
  0 siblings, 0 replies; 27+ messages in thread
From: Chinmay Rath @ 2024-04-19  9:17 UTC (permalink / raw)
  To: Richard Henderson, Chinmay Rath, qemu-ppc
  Cc: qemu-devel, npiggin, danielhb413, harshpb

Hi Richard,

On 4/16/24 23:27, Richard Henderson wrote:
> On 4/15/24 23:39, Chinmay Rath wrote:
>> The handler methods for divw[u] instructions internally use 
>> Rc(ctx->opcode),
>> for extraction of Rc field of instructions, which poses a problem if 
>> we move
>> the above said instructions to decodetree, as the ctx->opcode field 
>> is not
>> popluated in decodetree. Hence, making it decodetree compatible, so 
>> that the
>> mentioned insns can be safely move to decodetree specs.
>>
>> Signed-off-by: Chinmay Rath<rathc@linux.ibm.com>
>> ---
>>   target/ppc/translate.c | 9 +++++----
>>   1 file changed, 5 insertions(+), 4 deletions(-)
>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>
Thank you.
>> +static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret,
>> +                                     TCGv arg1, TCGv arg2, bool sign,
>> +                                     bool compute_ov, bool compute_rc0)
>
> Could drop the inline at the same time.
> Let the compiler decide.
>
I kept inline as is, as there are multiple gen_op_* routines with inline and
if necessary we could consider removing inline for all of them together 
in a separate patch :

grep inline target/ppc/translate.c | grep gen_op

static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf)
static inline void gen_op_cmpi32(TCGv arg0, target_ulong arg1, int s, 
int crf)
static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
static inline void gen_op_arith_compute_ca32(DisasContext *ctx,
static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret,
static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret,
static inline void gen_op_arith_modw(DisasContext *ctx, TCGv ret, TCGv arg1,
static inline void gen_op_arith_modd(DisasContext *ctx, TCGv ret, TCGv arg1,
static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
static inline void gen_op_mfspr(DisasContext *ctx)

Thanks,
Chinmay
>
> r~



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/8] target/ppc: Move divw[u, e, eu] instructions to decodetree.
  2024-04-16 18:19   ` Richard Henderson
@ 2024-04-19  9:18     ` Chinmay Rath
  0 siblings, 0 replies; 27+ messages in thread
From: Chinmay Rath @ 2024-04-19  9:18 UTC (permalink / raw)
  To: Richard Henderson, Chinmay Rath, qemu-ppc
  Cc: qemu-devel, npiggin, danielhb413, harshpb

On 4/16/24 23:49, Richard Henderson wrote:
> On 4/15/24 23:39, Chinmay Rath wrote:
>> Moving the following instructions to decodetree specification :
>>      divw[u, e, eu][o][.]     : XO-form
>>
>> The changes were verified by validating that the tcg ops generated by 
>> those
>> instructions remain the same, which were captured with the '-d 
>> in_asm,op' flag.
>>
>> Signed-off-by: Chinmay Rath<rathc@linux.ibm.com>
>> ---
>>   target/ppc/helper.h                        |  4 +--
>>   target/ppc/insn32.decode                   |  5 ++++
>>   target/ppc/int_helper.c                    |  4 +--
>>   target/ppc/translate.c                     | 31 ----------------------
>>   target/ppc/translate/fixedpoint-impl.c.inc | 24 +++++++++++++++++
>>   5 files changed, 33 insertions(+), 35 deletions(-)
>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>
Thanks,
Chinmay
>
> r~



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 4/8] target/ppc: Move neg, darn, mod{sw, uw} to decodetree.
  2024-04-16 18:25   ` Richard Henderson
@ 2024-04-19  9:18     ` Chinmay Rath
  0 siblings, 0 replies; 27+ messages in thread
From: Chinmay Rath @ 2024-04-19  9:18 UTC (permalink / raw)
  To: Richard Henderson, Chinmay Rath, qemu-ppc
  Cc: qemu-devel, npiggin, danielhb413, harshpb



On 4/16/24 23:55, Richard Henderson wrote:
> On 4/15/24 23:39, Chinmay Rath wrote:
>> Moving the below instructions to decodetree specification :
>>
>>     neg[o][.]           : XO-form
>>     mod{sw, uw}, darn    : X-form
>>
>> The changes were verified by validating that the tcg ops generated by 
>> those
>> instructions remain the same, which were captured with the '-d 
>> in_asm,op' flag.
>>
>> Signed-off-by: Chinmay Rath<rathc@linux.ibm.com>
>> ---
>>   target/ppc/helper.h                        |  4 +-
>>   target/ppc/insn32.decode                   |  8 ++++
>>   target/ppc/int_helper.c                    |  4 +-
>>   target/ppc/translate.c                     | 56 ----------------------
>>   target/ppc/translate/fixedpoint-impl.c.inc | 44 +++++++++++++++++
>>   5 files changed, 56 insertions(+), 60 deletions(-)
>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Thanks,
Chinmay
>
> r~



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree.
  2024-04-16 18:36   ` Richard Henderson
@ 2024-04-19  9:25     ` Chinmay Rath
  2024-04-20 15:51       ` Richard Henderson
  0 siblings, 1 reply; 27+ messages in thread
From: Chinmay Rath @ 2024-04-19  9:25 UTC (permalink / raw)
  To: Richard Henderson, Chinmay Rath, qemu-ppc
  Cc: qemu-devel, npiggin, danielhb413, harshpb

Hi Richard,

On 4/17/24 00:06, Richard Henderson wrote:
> On 4/15/24 23:39, Chinmay Rath wrote:
>> +static bool trans_MADDHDU(DisasContext *ctx, arg_MADDHDU *a)
> ...
>> +    tcg_gen_movi_i64(t1, 0);
>
> Drop the movi.
>
>> +    tcg_gen_add2_i64(t1, cpu_gpr[a->vrt], lo, hi, cpu_gpr[a->rc], t1);
>
> Use tcg_constant_i64(0).
>
Looks like tcg_gen_add2_i64 internally modifies the passed arguments, 
hence constant is not expected.
However, I tried using tcg_constant_i64(0) as suggested but this leads 
to an assert failure :
qemu-system-ppc64: ../tcg/tcg.c:5071: tcg_reg_alloc_op: Assertion 
`!temp_readonly(ts)' failed.

So I hope it is fine to keep the code change as is for now.
Let me know if you have any suggestions.

Thanks,
Chinmay
> With that,
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>
>
> r~



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 6/8] target/ppc: Move div/mod fixed-point insns (64 bits operands) to decodetree.
  2024-04-16 18:38   ` Richard Henderson
@ 2024-04-19  9:26     ` Chinmay Rath
  0 siblings, 0 replies; 27+ messages in thread
From: Chinmay Rath @ 2024-04-19  9:26 UTC (permalink / raw)
  To: Richard Henderson, Chinmay Rath, qemu-ppc
  Cc: qemu-devel, npiggin, danielhb413, harshpb



On 4/17/24 00:08, Richard Henderson wrote:
> On 4/15/24 23:39, Chinmay Rath wrote:
>> Moving the below instructions to decodetree specification :
>>
>>     divd[u, e, eu][o][.]    : XO-form
>>     mod{sd, ud}        : X-form
>>
>> With this patch, all the fixed-point arithmetic instructions have been
>> moved to decodetree.
>> The changes were verified by validating that the tcg ops generated by 
>> those
>> instructions remain the same, which were captured using the '-d 
>> in_asm,op' flag.
>> Also, remaned do_divwe method in fixedpoint-impl.c.inc to do_dive 
>> because it is
>> now used to divide doubleword operands as well, and not just words.
>>
>> Signed-off-by: Chinmay Rath<rathc@linux.ibm.com>
>> ---
>>   target/ppc/helper.h                        |  4 +-
>>   target/ppc/insn32.decode                   |  8 +++
>>   target/ppc/int_helper.c                    |  4 +-
>>   target/ppc/translate.c                     | 65 ++--------------------
>>   target/ppc/translate/fixedpoint-impl.c.inc | 29 +++++++++-
>>   5 files changed, 42 insertions(+), 68 deletions(-)
>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>
Thanks,
Chinmay
> r~



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.
  2024-04-16 19:20   ` Richard Henderson
@ 2024-04-19  9:28     ` Chinmay Rath
  0 siblings, 0 replies; 27+ messages in thread
From: Chinmay Rath @ 2024-04-19  9:28 UTC (permalink / raw)
  To: Richard Henderson, Chinmay Rath, qemu-ppc
  Cc: qemu-devel, npiggin, danielhb413, harshpb

Hi Richard,

On 4/17/24 00:50, Richard Henderson wrote:
> On 4/15/24 23:39, Chinmay Rath wrote:
>> Moving the following instructions to decodetree specification :
>>
>>     cmp{rb, eqb}, t{w, d}    : X-form
>>     t{w, d}i        : D-form
>>     isel            : A-form
>>
>> The changes were verified by validating that the tcg ops generated by 
>> those
>> instructions remain the same, which were captured using the '-d 
>> in_asm,op' flag.
>>
>> Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
>
> A faithful reorg of the existing code, so,
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>
Thank you.
> Notes for improvement:
>
>> +static bool trans_CMPRB(DisasContext *ctx, arg_CMPRB *a)
>> +{
>> +    TCGv_i32 src1 = tcg_temp_new_i32();
>> +    TCGv_i32 src2 = tcg_temp_new_i32();
>> +    TCGv_i32 src2lo = tcg_temp_new_i32();
>> +    TCGv_i32 src2hi = tcg_temp_new_i32();
>> +    TCGv_i32 crf = cpu_crf[a->bf];
>> +
>> +    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
>> +    tcg_gen_trunc_tl_i32(src1, cpu_gpr[a->ra]);
>> +    tcg_gen_trunc_tl_i32(src2, cpu_gpr[a->rb]);
>> +
>> +    tcg_gen_andi_i32(src1, src1, 0xFF);
>> +    tcg_gen_ext8u_i32(src2lo, src2);
>> +    tcg_gen_shri_i32(src2, src2, 8);
>> +    tcg_gen_ext8u_i32(src2hi, src2);
>
> tcg_gen_extract_i32(src2hi, src2, 8, 8);
>
>> +
>> +    tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
>> +    tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
>> +    tcg_gen_and_i32(crf, src2lo, src2hi);
>> +
>> +    if (a->l) {
>> +        tcg_gen_shri_i32(src2, src2, 8);
>> +        tcg_gen_ext8u_i32(src2lo, src2);
>
> tcg_gen_extract_i32(src2lo, src2, 16, 8);
>
>> +        tcg_gen_shri_i32(src2, src2, 8);
>> +        tcg_gen_ext8u_i32(src2hi, src2);
>
> tcg_gen_extract_i32(src2hi, src2, 24, 8);
>
Will update the above in v2.

Will implement the below improvements for trap insns as a separate patch 
later.

>> +/*
>> + * Fixed-Point Trap Instructions
>> + */
>> +
>> +static bool trans_TW(DisasContext *ctx, arg_TW *a)
>> +{
>> +    TCGv_i32 t0;
>> +
>> +    if (check_unconditional_trap(ctx, a->rt)) {
>> +        return true;
>> +    }
>> +    t0 = tcg_constant_i32(a->rt);
>> +    gen_helper_TW(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0);
>> +    return true;
>> +}
>> +
>> +static bool trans_TWI(DisasContext *ctx, arg_TWI *a)
>> +{
>> +    TCGv t0;
>> +    TCGv_i32 t1;
>> +
>> +    if (check_unconditional_trap(ctx, a->rt)) {
>> +        return true;
>> +    }
>> +    t0 = tcg_constant_tl(a->si);
>> +    t1 = tcg_constant_i32(a->rt);
>> +    gen_helper_TW(tcg_env, cpu_gpr[a->ra], t0, t1);
>> +    return true;
>> +}
>> +
>> +static bool trans_TD(DisasContext *ctx, arg_TD *a)
>> +{
>> +    TCGv_i32 t0;
>> +
>> +    REQUIRE_64BIT(ctx);
>> +    if (check_unconditional_trap(ctx, a->rt)) {
>> +        return true;
>> +    }
>> +    t0 = tcg_constant_i32(a->rt);
>> +    gen_helper_TD(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0);
>> +    return true;
>> +}
>> +
>> +static bool trans_TDI(DisasContext *ctx, arg_TDI *a)
>> +{
>> +    TCGv t0;
>> +    TCGv_i32 t1;
>> +
>> +    REQUIRE_64BIT(ctx);
>> +    if (check_unconditional_trap(ctx, a->rt)) {
>> +        return true;
>> +    }
>> +    t0 = tcg_constant_tl(a->si);
>> +    t1 = tcg_constant_i32(a->rt);
>> +    gen_helper_TD(tcg_env, cpu_gpr[a->ra], t0, t1);
>> +    return true;
>> +}
>
> See target/sparc/translate.c, delay_exception, for a method of 
> implementing compare-and-trap inline with no inline branch penalty.
>
> static void do_conditional_trap(DisasContext *ctx, unsigned to, TCGv 
> a, TCGv b)
> {
>     static const TCGCond ucond[8] = {
>         TCG_COND_NEVER, TCG_COND_GTU, TCG_COND_LTU, TCG_COND_NE,
>         TCG_COND_EQ,    TCG_COND_GEU, TCG_COND_LEU, TCG_COND_ALWAYS,
>     };
>     static const TCGCond scond[8] = {
>         TCG_COND_NEVER, TCG_COND_EQ,  TCG_COND_GT,  TCG_COND_GE,
>         TCG_COND_LT,    TCG_COND_LE,  TCG_COND_NE, TCG_COND_ALWAYS,
>     };
>
>     TCGCond uc = ucond[to & 7];
>     TCGCond sc = scond[to >> 2];
>
>     /* There is overlap with EQ; we may not need both comparisons. */
>     if (!(to & 0x18)) {
>         sc = TCG_COND_NEVER;
>     } else if (!(to & 0x03)) {
>         uc = TCG_COND_NEVER;
>     }
>
>     if (uc == TCG_COND_ALWAYS || sc == TCG_COND_ALWAYS) {
>         unconditional trap;
>         return true;
>     }
>     if (uc == TCG_COND_NEVER && sc == TCG_COND_NEVER) {
>         return true;
>     }
>
>     e = delay_exception(ctx, POWERPC_EXCP_TRAP);
>
>     if (uc != TCG_COND_NEVER) {
>         tcg_gen_brcond_tl(uc, a, b, e->lab);
>     }
>     if (sc != TCG_COND_NEVER) {
>         tcg_gen_brcond_tl(sc, a, b, e->lab);
>     }
>     return true;
> }
>
> bool trans_TW(...)
> {
>     TCGv a = tcg_temp_new();
>     TCGv b = tcg_temp_new();
>
>     /* Note that consistent sign extensions work for unsigned 
> comparisons. */
>     tcg_gen_exts_i32_tl(a, ra);
>     tcg_gen_exts_i32_tl(b, rb);
>     return do_conditional_trap(ctx, to, a, b);
> }
>
> etc.
>
>
Thanks,
Chinmay
> r~



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 8/8] target/ppc: Move logical fixed-point instructions to decodetree.
  2024-04-16 19:35   ` Richard Henderson
@ 2024-04-19  9:29     ` Chinmay Rath
  0 siblings, 0 replies; 27+ messages in thread
From: Chinmay Rath @ 2024-04-19  9:29 UTC (permalink / raw)
  To: Richard Henderson, Chinmay Rath, qemu-ppc
  Cc: qemu-devel, npiggin, danielhb413, harshpb



On 4/17/24 01:05, Richard Henderson wrote:
> On 4/15/24 23:39, Chinmay Rath wrote:
>> Moving the below instructions to decodetree specification :
>>
>>     andi[s]., {ori, xori}[s]            : D-form
>>
>>     {and, andc, nand, or, orc, nor, xor, eqv}[.],
>>     exts{b, h, w}[.],  cnt{l, t}z{w, d}[.],
>>     popcnt{b, w, d},  prty{w, d}, cmp, bpermd    : X-form
>>
>> With this patch, all the fixed-point logical instructions have been
>> moved to decodetree.
>> The changes were verified by validating that the tcg ops generated by 
>> those
>> instructions remain the same, which were captured with the '-d 
>> in_asm,op' flag.
>>
>> Signed-off-by: Chinmay Rath<rathc@linux.ibm.com>
>> ---
>>   target/ppc/helper.h                        |   8 +-
>>   target/ppc/insn32.decode                   |  38 +++
>>   target/ppc/int_helper.c                    |  10 +-
>>   target/ppc/translate.c                     | 359 ---------------------
>>   target/ppc/translate/fixedpoint-impl.c.inc | 269 +++++++++++++++
>>   5 files changed, 316 insertions(+), 368 deletions(-)
>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Thanks,
Chinmay
>
> r~
>



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree.
  2024-04-19  9:25     ` Chinmay Rath
@ 2024-04-20 15:51       ` Richard Henderson
  2024-04-22  6:32         ` Chinmay Rath
  0 siblings, 1 reply; 27+ messages in thread
From: Richard Henderson @ 2024-04-20 15:51 UTC (permalink / raw)
  To: Chinmay Rath, Chinmay Rath, qemu-ppc
  Cc: qemu-devel, npiggin, danielhb413, harshpb

On 4/19/24 02:25, Chinmay Rath wrote:
> Hi Richard,
> 
> On 4/17/24 00:06, Richard Henderson wrote:
>> On 4/15/24 23:39, Chinmay Rath wrote:
>>> +static bool trans_MADDHDU(DisasContext *ctx, arg_MADDHDU *a)
>> ...
>>> +    tcg_gen_movi_i64(t1, 0);
>>
>> Drop the movi.
>>
>>> +    tcg_gen_add2_i64(t1, cpu_gpr[a->vrt], lo, hi, cpu_gpr[a->rc], t1);
>>
>> Use tcg_constant_i64(0).
>>
> Looks like tcg_gen_add2_i64 internally modifies the passed arguments, hence constant is 
> not expected.
> However, I tried using tcg_constant_i64(0) as suggested but this leads to an assert failure :
> qemu-system-ppc64: ../tcg/tcg.c:5071: tcg_reg_alloc_op: Assertion `!temp_readonly(ts)' 
> failed.

You misunderstood my suggestion.

   TCGv_i64 t1 = tcg_temp_new_i64();
   tcg_gen_add2_i64(t1, cpu_gpr[vrt], lo, hi, cpu_gpr[a->rc], tcg_constantant_i64(0));


r~


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree.
  2024-04-20 15:51       ` Richard Henderson
@ 2024-04-22  6:32         ` Chinmay Rath
  0 siblings, 0 replies; 27+ messages in thread
From: Chinmay Rath @ 2024-04-22  6:32 UTC (permalink / raw)
  To: Richard Henderson, Chinmay Rath, qemu-ppc
  Cc: qemu-devel, npiggin, danielhb413, harshpb

Hi Richard,

On 4/20/24 21:21, Richard Henderson wrote:
> On 4/19/24 02:25, Chinmay Rath wrote:
>> Hi Richard,
>>
>> On 4/17/24 00:06, Richard Henderson wrote:
>>> On 4/15/24 23:39, Chinmay Rath wrote:
>>>> +static bool trans_MADDHDU(DisasContext *ctx, arg_MADDHDU *a)
>>> ...
>>>> +    tcg_gen_movi_i64(t1, 0);
>>>
>>> Drop the movi.
>>>
>>>> +    tcg_gen_add2_i64(t1, cpu_gpr[a->vrt], lo, hi, cpu_gpr[a->rc], 
>>>> t1);
>>>
>>> Use tcg_constant_i64(0).
>>>
>> Looks like tcg_gen_add2_i64 internally modifies the passed arguments, 
>> hence constant is not expected.
>> However, I tried using tcg_constant_i64(0) as suggested but this 
>> leads to an assert failure :
>> qemu-system-ppc64: ../tcg/tcg.c:5071: tcg_reg_alloc_op: Assertion 
>> `!temp_readonly(ts)' failed.
>
> You misunderstood my suggestion.
>
>   TCGv_i64 t1 = tcg_temp_new_i64();
>   tcg_gen_add2_i64(t1, cpu_gpr[vrt], lo, hi, cpu_gpr[a->rc], 
> tcg_constantant_i64(0));
>
Thank you for the clarification. Will add this to v2.
Regards,
Chinmay
>
> r~
>



^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2024-04-22  6:33 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-16  6:39 [PATCH 0/8] target/ppc: Move fixed-point insns to decodetree Chinmay Rath
2024-04-16  6:39 ` [PATCH 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions " Chinmay Rath
2024-04-16 17:56   ` Richard Henderson
2024-04-19  9:08     ` Chinmay Rath
2024-04-16  6:39 ` [PATCH 2/8] target/ppc: Make divw[u] handler method decodetree compatible Chinmay Rath
2024-04-16 17:57   ` Richard Henderson
2024-04-19  9:17     ` Chinmay Rath
2024-04-16  6:39 ` [PATCH 3/8] target/ppc: Move divw[u, e, eu] instructions to decodetree Chinmay Rath
2024-04-16 18:19   ` Richard Henderson
2024-04-19  9:18     ` Chinmay Rath
2024-04-16  6:39 ` [PATCH 4/8] target/ppc: Move neg, darn, mod{sw, uw} " Chinmay Rath
2024-04-16 18:25   ` Richard Henderson
2024-04-19  9:18     ` Chinmay Rath
2024-04-16  6:39 ` [PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) " Chinmay Rath
2024-04-16 18:36   ` Richard Henderson
2024-04-19  9:25     ` Chinmay Rath
2024-04-20 15:51       ` Richard Henderson
2024-04-22  6:32         ` Chinmay Rath
2024-04-16  6:39 ` [PATCH 6/8] target/ppc: Move div/mod fixed-point insns (64 bits " Chinmay Rath
2024-04-16 18:38   ` Richard Henderson
2024-04-19  9:26     ` Chinmay Rath
2024-04-16  6:39 ` [PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions " Chinmay Rath
2024-04-16 19:20   ` Richard Henderson
2024-04-19  9:28     ` Chinmay Rath
2024-04-16  6:39 ` [PATCH 8/8] target/ppc: Move logical fixed-point " Chinmay Rath
2024-04-16 19:35   ` Richard Henderson
2024-04-19  9:29     ` Chinmay Rath

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.