All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] target-ppc: optimize cmp translation
@ 2017-12-17  5:49 Paolo Bonzini
  2017-12-18  2:39 ` David Gibson
  2017-12-18 19:01 ` Richard Henderson
  0 siblings, 2 replies; 3+ messages in thread
From: Paolo Bonzini @ 2017-12-17  5:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: dgibson, qemu-ppc

We know that only one bit (in addition to SO) is going to be set in
the condition register, so do two movconds instead of three setconds,
three shifts and two ORs.

For ppc64-linux-user, the code size reduction is around 5% and the
performance improvement slightly less than 10%.  For softmmu, the
improvement is around 5%.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/ppc/translate.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 998fbed848..0e9e6823a3 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -605,27 +605,22 @@ static opc_handler_t invalid_handler = {
 static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
 {
     TCGv t0 = tcg_temp_new();
-    TCGv_i32 t1 = tcg_temp_new_i32();
-
-    tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so);
-
-    tcg_gen_setcond_tl((s ? TCG_COND_LT: TCG_COND_LTU), t0, arg0, arg1);
-    tcg_gen_trunc_tl_i32(t1, t0);
-    tcg_gen_shli_i32(t1, t1, CRF_LT_BIT);
-    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+    TCGv t1 = tcg_temp_new();
+    TCGv_i32 t = tcg_temp_new_i32();
 
-    tcg_gen_setcond_tl((s ? TCG_COND_GT: TCG_COND_GTU), t0, arg0, arg1);
-    tcg_gen_trunc_tl_i32(t1, t0);
-    tcg_gen_shli_i32(t1, t1, CRF_GT_BIT);
-    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+    tcg_gen_movi_tl(t0, CRF_EQ);
+    tcg_gen_movi_tl(t1, CRF_LT);
+    tcg_gen_movcond_tl((s ? TCG_COND_LT : TCG_COND_LTU), t0, arg0, arg1, t1, t0);
+    tcg_gen_movi_tl(t1, CRF_GT);
+    tcg_gen_movcond_tl((s ? TCG_COND_GT : TCG_COND_GTU), t0, arg0, arg1, t1, t0);
 
-    tcg_gen_setcond_tl(TCG_COND_EQ, t0, arg0, arg1);
-    tcg_gen_trunc_tl_i32(t1, t0);
-    tcg_gen_shli_i32(t1, t1, CRF_EQ_BIT);
-    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+    tcg_gen_trunc_tl_i32(t, t0);
+    tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so);
+    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t);
 
     tcg_temp_free(t0);
-    tcg_temp_free_i32(t1);
+    tcg_temp_free(t1);
+    tcg_temp_free_i32(t);
 }
 
 static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [Qemu-devel] [PATCH] target-ppc: optimize cmp translation
  2017-12-17  5:49 [Qemu-devel] [PATCH] target-ppc: optimize cmp translation Paolo Bonzini
@ 2017-12-18  2:39 ` David Gibson
  2017-12-18 19:01 ` Richard Henderson
  1 sibling, 0 replies; 3+ messages in thread
From: David Gibson @ 2017-12-18  2:39 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel, dgibson, qemu-ppc

[-- Attachment #1: Type: text/plain, Size: 2634 bytes --]

On Sun, Dec 17, 2017 at 06:49:53AM +0100, Paolo Bonzini wrote:
> We know that only one bit (in addition to SO) is going to be set in
> the condition register, so do two movconds instead of three setconds,
> three shifts and two ORs.
> 
> For ppc64-linux-user, the code size reduction is around 5% and the
> performance improvement slightly less than 10%.  For softmmu, the
> improvement is around 5%.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Applied to ppc-for-2.12, thanks.


> ---
>  target/ppc/translate.c | 29 ++++++++++++-----------------
>  1 file changed, 12 insertions(+), 17 deletions(-)
> 
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index 998fbed848..0e9e6823a3 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -605,27 +605,22 @@ static opc_handler_t invalid_handler = {
>  static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
>  {
>      TCGv t0 = tcg_temp_new();
> -    TCGv_i32 t1 = tcg_temp_new_i32();
> -
> -    tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so);
> -
> -    tcg_gen_setcond_tl((s ? TCG_COND_LT: TCG_COND_LTU), t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_LT_BIT);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    TCGv t1 = tcg_temp_new();
> +    TCGv_i32 t = tcg_temp_new_i32();
>  
> -    tcg_gen_setcond_tl((s ? TCG_COND_GT: TCG_COND_GTU), t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_GT_BIT);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_movi_tl(t0, CRF_EQ);
> +    tcg_gen_movi_tl(t1, CRF_LT);
> +    tcg_gen_movcond_tl((s ? TCG_COND_LT : TCG_COND_LTU), t0, arg0, arg1, t1, t0);
> +    tcg_gen_movi_tl(t1, CRF_GT);
> +    tcg_gen_movcond_tl((s ? TCG_COND_GT : TCG_COND_GTU), t0, arg0, arg1, t1, t0);
>  
> -    tcg_gen_setcond_tl(TCG_COND_EQ, t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_EQ_BIT);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_trunc_tl_i32(t, t0);
> +    tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so);
> +    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t);
>  
>      tcg_temp_free(t0);
> -    tcg_temp_free_i32(t1);
> +    tcg_temp_free(t1);
> +    tcg_temp_free_i32(t);
>  }
>  
>  static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Qemu-devel] [PATCH] target-ppc: optimize cmp translation
  2017-12-17  5:49 [Qemu-devel] [PATCH] target-ppc: optimize cmp translation Paolo Bonzini
  2017-12-18  2:39 ` David Gibson
@ 2017-12-18 19:01 ` Richard Henderson
  1 sibling, 0 replies; 3+ messages in thread
From: Richard Henderson @ 2017-12-18 19:01 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: dgibson, qemu-ppc

On 12/16/2017 09:49 PM, Paolo Bonzini wrote:
> We know that only one bit (in addition to SO) is going to be set in
> the condition register, so do two movconds instead of three setconds,
> three shifts and two ORs.
> 
> For ppc64-linux-user, the code size reduction is around 5% and the
> performance improvement slightly less than 10%.  For softmmu, the
> improvement is around 5%.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  target/ppc/translate.c | 29 ++++++++++++-----------------
>  1 file changed, 12 insertions(+), 17 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-12-18 19:01 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-17  5:49 [Qemu-devel] [PATCH] target-ppc: optimize cmp translation Paolo Bonzini
2017-12-18  2:39 ` David Gibson
2017-12-18 19:01 ` Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.