All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/2] tcg: Add tcg_gen_mulsu2_*
@ 2016-09-27 21:23 Richard Henderson
  2016-09-27 21:23 ` [Qemu-devel] [PATCH 1/2] tcg: Add tcg_gen_mulsu2_{i32,i64,tl} Richard Henderson
  2016-09-27 21:23 ` [Qemu-devel] [PATCH 2/2] target-microblaze: Cleanup dec_mul Richard Henderson
  0 siblings, 2 replies; 4+ messages in thread
From: Richard Henderson @ 2016-09-27 21:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: Sagar Karandikar

While reviewing the recent riscv patch set, I made a suggestion to copy
some of the bits from tcg_gen_muls2_i64 in order to implement the mulhsu
instruction.

However, I noticed that the same operation is present in another target,
so I thought that it would be better to have this as a standard operation.


r~


Richard Henderson (2):
  tcg: Add tcg_gen_mulsu2_{i32,i64,tl}
  target-microblaze: Cleanup dec_mul

 target-microblaze/translate.c | 61 +++++++------------------------------------
 tcg/tcg-op.c                  | 43 ++++++++++++++++++++++++++++++
 tcg/tcg-op.h                  |  4 +++
 3 files changed, 56 insertions(+), 52 deletions(-)

-- 
2.5.5

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH 1/2] tcg: Add tcg_gen_mulsu2_{i32,i64,tl}
  2016-09-27 21:23 [Qemu-devel] [PATCH 0/2] tcg: Add tcg_gen_mulsu2_* Richard Henderson
@ 2016-09-27 21:23 ` Richard Henderson
  2016-09-27 21:23 ` [Qemu-devel] [PATCH 2/2] target-microblaze: Cleanup dec_mul Richard Henderson
  1 sibling, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2016-09-27 21:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: Sagar Karandikar

This multiply has one signed input and one unsigned input,
producing the full double-width result.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/tcg-op.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 tcg/tcg-op.h |  4 ++++
 2 files changed, 47 insertions(+)

diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 291d50b..d92a6cd 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -688,6 +688,33 @@ void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
     }
 }
 
+void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        TCGv_i32 t2 = tcg_temp_new_i32();
+        tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
+        /* Adjust for negative input for the signed arg1.  */
+        tcg_gen_sari_i32(t2, arg1, 31);
+        tcg_gen_and_i32(t2, t2, arg2);
+        tcg_gen_sub_i32(rh, t1, t2);
+        tcg_gen_mov_i32(rl, t0);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+        tcg_temp_free_i32(t2);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_ext_i32_i64(t0, arg1);
+        tcg_gen_extu_i32_i64(t1, arg2);
+        tcg_gen_mul_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
 void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
     if (TCG_TARGET_HAS_ext8s_i32) {
@@ -1758,6 +1785,22 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
     }
 }
 
+void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
+    /* Adjust for negative input for the signed arg1.  */
+    tcg_gen_sari_i64(t2, arg1, 63);
+    tcg_gen_and_i64(t2, t2, arg2);
+    tcg_gen_sub_i64(rh, t1, t2);
+    tcg_gen_mov_i64(rl, t0);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+}
+
 /* Size changing operations.  */
 
 void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 02cb376..56407d2 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -306,6 +306,7 @@ void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh);
 void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
@@ -482,6 +483,7 @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
 void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg);
@@ -932,6 +934,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_sub2_tl tcg_gen_sub2_i64
 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i64
 #define tcg_gen_muls2_tl tcg_gen_muls2_i64
+#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i64
 #else
 #define tcg_gen_movi_tl tcg_gen_movi_i32
 #define tcg_gen_mov_tl tcg_gen_mov_i32
@@ -1009,6 +1012,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_sub2_tl tcg_gen_sub2_i32
 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i32
 #define tcg_gen_muls2_tl tcg_gen_muls2_i32
+#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i32
 #endif
 
 #if UINTPTR_MAX == UINT32_MAX
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH 2/2] target-microblaze: Cleanup dec_mul
  2016-09-27 21:23 [Qemu-devel] [PATCH 0/2] tcg: Add tcg_gen_mulsu2_* Richard Henderson
  2016-09-27 21:23 ` [Qemu-devel] [PATCH 1/2] tcg: Add tcg_gen_mulsu2_{i32,i64,tl} Richard Henderson
@ 2016-09-27 21:23 ` Richard Henderson
  2016-09-28  7:36   ` Edgar E. Iglesias
  1 sibling, 1 reply; 4+ messages in thread
From: Richard Henderson @ 2016-09-27 21:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: Sagar Karandikar, Edgar E. Iglesias

Use tcg_gen_mul_tl for muli and mul instructions.
Use tcg_gen_muls2_tl for mulh instruction.
Use tcg_gen_mulu2_tl for mulhu instruction.
Use tcg_gen_mulsu2_tl for mulhsu instruction.

Note that this last fixes a bug, in that mulhsu was
previously treating both operands as signed, instead
of treating rb as unsigned.

Cc: Edgar E. Iglesias <edgar.iglesias@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-microblaze/translate.c | 61 +++++++------------------------------------
 1 file changed, 9 insertions(+), 52 deletions(-)

diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index 5274191..de2090a 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -581,50 +581,10 @@ static void dec_msr(DisasContext *dc)
     }
 }
 
-/* 64-bit signed mul, lower result in d and upper in d2.  */
-static void t_gen_muls(TCGv d, TCGv d2, TCGv a, TCGv b)
-{
-    TCGv_i64 t0, t1;
-
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-
-    tcg_gen_ext_i32_i64(t0, a);
-    tcg_gen_ext_i32_i64(t1, b);
-    tcg_gen_mul_i64(t0, t0, t1);
-
-    tcg_gen_extrl_i64_i32(d, t0);
-    tcg_gen_shri_i64(t0, t0, 32);
-    tcg_gen_extrl_i64_i32(d2, t0);
-
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-}
-
-/* 64-bit unsigned muls, lower result in d and upper in d2.  */
-static void t_gen_mulu(TCGv d, TCGv d2, TCGv a, TCGv b)
-{
-    TCGv_i64 t0, t1;
-
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-
-    tcg_gen_extu_i32_i64(t0, a);
-    tcg_gen_extu_i32_i64(t1, b);
-    tcg_gen_mul_i64(t0, t0, t1);
-
-    tcg_gen_extrl_i64_i32(d, t0);
-    tcg_gen_shri_i64(t0, t0, 32);
-    tcg_gen_extrl_i64_i32(d2, t0);
-
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-}
-
 /* Multiplier unit.  */
 static void dec_mul(DisasContext *dc)
 {
-    TCGv d[2];
+    TCGv tmp;
     unsigned int subcode;
 
     if ((dc->tb_flags & MSR_EE_FLAG)
@@ -636,13 +596,11 @@ static void dec_mul(DisasContext *dc)
     }
 
     subcode = dc->imm & 3;
-    d[0] = tcg_temp_new();
-    d[1] = tcg_temp_new();
 
     if (dc->type_b) {
         LOG_DIS("muli r%d r%d %x\n", dc->rd, dc->ra, dc->imm);
-        t_gen_mulu(cpu_R[dc->rd], d[1], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
-        goto done;
+        tcg_gen_mul_tl(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+        return;
     }
 
     /* mulh, mulhsu and mulhu are not available if C_USE_HW_MUL is < 2.  */
@@ -651,30 +609,29 @@ static void dec_mul(DisasContext *dc)
         /* nop??? */
     }
 
+    tmp = tcg_temp_new();
     switch (subcode) {
         case 0:
             LOG_DIS("mul r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
-            t_gen_mulu(cpu_R[dc->rd], d[1], cpu_R[dc->ra], cpu_R[dc->rb]);
+            tcg_gen_mul_tl(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
             break;
         case 1:
             LOG_DIS("mulh r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
-            t_gen_muls(d[0], cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            tcg_gen_muls2_tl(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
             break;
         case 2:
             LOG_DIS("mulhsu r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
-            t_gen_muls(d[0], cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            tcg_gen_mulsu2_tl(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
             break;
         case 3:
             LOG_DIS("mulhu r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
-            t_gen_mulu(d[0], cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            tcg_gen_mulu2_tl(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
             break;
         default:
             cpu_abort(CPU(dc->cpu), "unknown MUL insn %x\n", subcode);
             break;
     }
-done:
-    tcg_temp_free(d[0]);
-    tcg_temp_free(d[1]);
+    tcg_temp_free(tmp);
 }
 
 /* Div unit.  */
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] target-microblaze: Cleanup dec_mul
  2016-09-27 21:23 ` [Qemu-devel] [PATCH 2/2] target-microblaze: Cleanup dec_mul Richard Henderson
@ 2016-09-28  7:36   ` Edgar E. Iglesias
  0 siblings, 0 replies; 4+ messages in thread
From: Edgar E. Iglesias @ 2016-09-28  7:36 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, Sagar Karandikar

On Tue, Sep 27, 2016 at 02:23:53PM -0700, Richard Henderson wrote:
> Use tcg_gen_mul_tl for muli and mul instructions.
> Use tcg_gen_muls2_tl for mulh instruction.
> Use tcg_gen_mulu2_tl for mulhu instruction.
> Use tcg_gen_mulsu2_tl for mulhsu instruction.
> 
> Note that this last fixes a bug, in that mulhsu was
> previously treating both operands as signed, instead
> of treating rb as unsigned.

Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
Tested-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>


> 
> Cc: Edgar E. Iglesias <edgar.iglesias@gmail.com>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target-microblaze/translate.c | 61 +++++++------------------------------------
>  1 file changed, 9 insertions(+), 52 deletions(-)
> 
> diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
> index 5274191..de2090a 100644
> --- a/target-microblaze/translate.c
> +++ b/target-microblaze/translate.c
> @@ -581,50 +581,10 @@ static void dec_msr(DisasContext *dc)
>      }
>  }
>  
> -/* 64-bit signed mul, lower result in d and upper in d2.  */
> -static void t_gen_muls(TCGv d, TCGv d2, TCGv a, TCGv b)
> -{
> -    TCGv_i64 t0, t1;
> -
> -    t0 = tcg_temp_new_i64();
> -    t1 = tcg_temp_new_i64();
> -
> -    tcg_gen_ext_i32_i64(t0, a);
> -    tcg_gen_ext_i32_i64(t1, b);
> -    tcg_gen_mul_i64(t0, t0, t1);
> -
> -    tcg_gen_extrl_i64_i32(d, t0);
> -    tcg_gen_shri_i64(t0, t0, 32);
> -    tcg_gen_extrl_i64_i32(d2, t0);
> -
> -    tcg_temp_free_i64(t0);
> -    tcg_temp_free_i64(t1);
> -}
> -
> -/* 64-bit unsigned muls, lower result in d and upper in d2.  */
> -static void t_gen_mulu(TCGv d, TCGv d2, TCGv a, TCGv b)
> -{
> -    TCGv_i64 t0, t1;
> -
> -    t0 = tcg_temp_new_i64();
> -    t1 = tcg_temp_new_i64();
> -
> -    tcg_gen_extu_i32_i64(t0, a);
> -    tcg_gen_extu_i32_i64(t1, b);
> -    tcg_gen_mul_i64(t0, t0, t1);
> -
> -    tcg_gen_extrl_i64_i32(d, t0);
> -    tcg_gen_shri_i64(t0, t0, 32);
> -    tcg_gen_extrl_i64_i32(d2, t0);
> -
> -    tcg_temp_free_i64(t0);
> -    tcg_temp_free_i64(t1);
> -}
> -
>  /* Multiplier unit.  */
>  static void dec_mul(DisasContext *dc)
>  {
> -    TCGv d[2];
> +    TCGv tmp;
>      unsigned int subcode;
>  
>      if ((dc->tb_flags & MSR_EE_FLAG)
> @@ -636,13 +596,11 @@ static void dec_mul(DisasContext *dc)
>      }
>  
>      subcode = dc->imm & 3;
> -    d[0] = tcg_temp_new();
> -    d[1] = tcg_temp_new();
>  
>      if (dc->type_b) {
>          LOG_DIS("muli r%d r%d %x\n", dc->rd, dc->ra, dc->imm);
> -        t_gen_mulu(cpu_R[dc->rd], d[1], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
> -        goto done;
> +        tcg_gen_mul_tl(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
> +        return;
>      }
>  
>      /* mulh, mulhsu and mulhu are not available if C_USE_HW_MUL is < 2.  */
> @@ -651,30 +609,29 @@ static void dec_mul(DisasContext *dc)
>          /* nop??? */
>      }
>  
> +    tmp = tcg_temp_new();
>      switch (subcode) {
>          case 0:
>              LOG_DIS("mul r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
> -            t_gen_mulu(cpu_R[dc->rd], d[1], cpu_R[dc->ra], cpu_R[dc->rb]);
> +            tcg_gen_mul_tl(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
>              break;
>          case 1:
>              LOG_DIS("mulh r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
> -            t_gen_muls(d[0], cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
> +            tcg_gen_muls2_tl(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
>              break;
>          case 2:
>              LOG_DIS("mulhsu r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
> -            t_gen_muls(d[0], cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
> +            tcg_gen_mulsu2_tl(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
>              break;
>          case 3:
>              LOG_DIS("mulhu r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
> -            t_gen_mulu(d[0], cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
> +            tcg_gen_mulu2_tl(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
>              break;
>          default:
>              cpu_abort(CPU(dc->cpu), "unknown MUL insn %x\n", subcode);
>              break;
>      }
> -done:
> -    tcg_temp_free(d[0]);
> -    tcg_temp_free(d[1]);
> +    tcg_temp_free(tmp);
>  }
>  
>  /* Div unit.  */
> -- 
> 2.5.5
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-09-28  7:36 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-27 21:23 [Qemu-devel] [PATCH 0/2] tcg: Add tcg_gen_mulsu2_* Richard Henderson
2016-09-27 21:23 ` [Qemu-devel] [PATCH 1/2] tcg: Add tcg_gen_mulsu2_{i32,i64,tl} Richard Henderson
2016-09-27 21:23 ` [Qemu-devel] [PATCH 2/2] target-microblaze: Cleanup dec_mul Richard Henderson
2016-09-28  7:36   ` Edgar E. Iglesias

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.