All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 1/7] tcg: Generic support for conditional set and conditional move.
  2009-12-17  1:19 [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes Richard Henderson
@ 2009-12-16  0:34 ` Richard Henderson
  2009-12-16  0:35 ` [Qemu-devel] [PATCH 2/7] tcg-amd64: Implement setcond and movcond Richard Henderson
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2009-12-16  0:34 UTC (permalink / raw)
  To: qemu-devel

Defines setcond and movcond for implementing conditional moves at
the tcg opcode level.  64-bit-on-32-bit is expanded via a setcond2
primitive plus other operations.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/README    |   16 ++++++++++-
 tcg/tcg-op.h  |   87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/tcg-opc.h |    5 +++
 tcg/tcg.c     |   23 +++++++++++----
 tcg/tcg.h     |    5 +++
 5 files changed, 129 insertions(+), 7 deletions(-)

diff --git a/tcg/README b/tcg/README
index e672258..f9efa4d 100644
--- a/tcg/README
+++ b/tcg/README
@@ -282,6 +282,20 @@ order bytes must be set to zero.
 Indicate that the value of t0 won't be used later. It is useful to
 force dead code elimination.
 
+********* Conditional moves
+
+* setcond_i32/i64 cond, t0, t1, t2 
+
+t0 = (t1 cond t2)
+
+Set t0 to 1 if (t1 cond t2) is true, otherwise set to 0.
+
+* movcond_i32/i64 cond, t0, c1, c2, ot, of
+
+t0 = (c1 cond c2 ? ot : of)
+
+Set t1 to ot if (c1 cond c2) is true, otherwise set to of.
+
 ********* Type conversions
 
 * ext_i32_i64 t0, t1
@@ -375,7 +389,7 @@ The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or
 
 On a 32 bit target, all 64 bit operations are converted to 32 bits. A
 few specific operations must be implemented to allow it (see add2_i32,
-sub2_i32, brcond2_i32).
+sub2_i32, brcond2_i32, setcond2_i32).
 
 Floating point operations are not supported in this version. A
 previous incarnation of the code generator had full support of them,
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index faf2e8b..4d0fec0 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -280,6 +280,32 @@ static inline void tcg_gen_op6_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
     *gen_opparam_ptr++ = GET_TCGV_I64(arg6);
 }
 
+static inline void tcg_gen_op6i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+                                    TCGv_i32 arg3, TCGv_i32 arg4,
+                                    TCGv_i32 arg5, TCGArg arg6)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+    *gen_opparam_ptr++ = arg6;
+}
+
+static inline void tcg_gen_op6i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+                                    TCGv_i64 arg3, TCGv_i64 arg4,
+                                    TCGv_i64 arg5, TCGArg arg6)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+    *gen_opparam_ptr++ = arg6;
+}
+
 static inline void tcg_gen_op6ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                      TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5,
                                      TCGArg arg6)
@@ -1795,6 +1821,67 @@ static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
     }
 }
 
+static inline void tcg_gen_setcond_i32(int cond, TCGv_i32 ret,
+                                       TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcond_i64(int cond, TCGv_i64 ret,
+                                       TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#if TCG_TARGET_REG_BITS == 64
+    tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+#else
+    tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+		     TCGV_LOW(arg1), TCGV_HIGH(arg1),
+		     TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+#endif
+}
+
+static inline void tcg_gen_movcond_i32(int cond, TCGv_i32 ret,
+                                       TCGv_i32 cmp1, TCGv_i32 cmp2,
+                                       TCGv_i32 op_t, TCGv_i32 op_f)
+{
+    if (TCGV_EQUAL_I32(op_t, op_f)) {
+        tcg_gen_mov_i32(ret, op_t);
+        return;
+    }
+    tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, cmp1, cmp2, op_t, op_f, cond);
+}
+
+static inline void tcg_gen_movcond_i64(int cond, TCGv_i64 ret,
+                                       TCGv_i64 cmp1, TCGv_i64 cmp2,
+                                       TCGv_i64 op_t, TCGv_i64 op_f)
+{
+    if (TCGV_EQUAL_I64(op_t, op_f)) {
+        tcg_gen_mov_i64(ret, op_t);
+        return;
+    }
+#if TCG_TARGET_REG_BITS == 64
+    tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, cmp1, cmp2, op_t, op_f, cond);
+#else
+    {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 zero = tcg_const_i32(0);
+
+        tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
+                         TCGV_LOW(cmp1), TCGV_HIGH(cmp1),
+                         TCGV_LOW(cmp2), TCGV_HIGH(cmp2), cond);
+
+        /* ??? We could perhaps conditionally define a movcond2_i32.  */
+        tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, zero,
+                            TCGV_LOW(op_t), TCGV_LOW(op_f));
+        tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, zero,
+                            TCGV_HIGH(op_t), TCGV_HIGH(op_f));
+
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(zero);
+    }
+#endif
+}
+
 /***************************************/
 /* QEMU specific operations. Their type depend on the QEMU CPU
    type. */
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index b7f3fd7..086968c 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -42,6 +42,8 @@ DEF2(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 
 DEF2(mov_i32, 1, 1, 0, 0)
 DEF2(movi_i32, 1, 0, 1, 0)
+DEF2(setcond_i32, 1, 2, 1, 0)
+DEF2(movcond_i32, 1, 4, 1, 0)
 /* load/store */
 DEF2(ld8u_i32, 1, 1, 1, 0)
 DEF2(ld8s_i32, 1, 1, 1, 0)
@@ -82,6 +84,7 @@ DEF2(add2_i32, 2, 4, 0, 0)
 DEF2(sub2_i32, 2, 4, 0, 0)
 DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 DEF2(mulu2_i32, 2, 2, 0, 0)
+DEF2(setcond2_i32, 1, 4, 1, 0)
 #endif
 #ifdef TCG_TARGET_HAS_ext8s_i32
 DEF2(ext8s_i32, 1, 1, 0, 0)
@@ -111,6 +114,8 @@ DEF2(neg_i32, 1, 1, 0, 0)
 #if TCG_TARGET_REG_BITS == 64
 DEF2(mov_i64, 1, 1, 0, 0)
 DEF2(movi_i64, 1, 0, 1, 0)
+DEF2(setcond_i64, 1, 2, 1, 0)
+DEF2(movcond_i64, 1, 4, 1, 0)
 /* load/store */
 DEF2(ld8u_i64, 1, 1, 1, 0)
 DEF2(ld8s_i64, 1, 1, 1, 0)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 3c0e296..4fae82a 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -670,6 +670,7 @@ void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
 }
 #endif
 
+
 static void tcg_reg_alloc_start(TCGContext *s)
 {
     int i;
@@ -888,21 +889,31 @@ void tcg_dump_ops(TCGContext *s, FILE *outfile)
                 fprintf(outfile, "%s",
                         tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
             }
-            if (c == INDEX_op_brcond_i32
+	    switch (c) {
+	    case INDEX_op_brcond_i32:
+#if TCG_TARGET_REG_BITS == 32
+	    case INDEX_op_brcond2_i32:
+#elif TCG_TARGET_REG_BITS == 64
+	    case INDEX_op_brcond_i64:
+#endif
+	    case INDEX_op_setcond_i32:
+	    case INDEX_op_movcond_i32:
 #if TCG_TARGET_REG_BITS == 32
-                || c == INDEX_op_brcond2_i32
+	    case INDEX_op_setcond2_i32:
 #elif TCG_TARGET_REG_BITS == 64
-                || c == INDEX_op_brcond_i64
+	    case INDEX_op_setcond_i64:
+	    case INDEX_op_movcond_i64:
 #endif
-                ) {
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
                     fprintf(outfile, ",%s", cond_name[args[k++]]);
                 else
                     fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]);
                 i = 1;
-            }
-            else
+		break;
+	    default:
                 i = 0;
+                break;
+            }
             for(; i < nb_cargs; i++) {
                 if (k != 0)
                     fprintf(outfile, ",");
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 9824493..376d6af 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -205,6 +205,11 @@ typedef enum {
     TCG_COND_GTU,
 } TCGCond;
 
+static inline TCGCond tcg_invert_cond(TCGCond c)
+{
+    return (TCGCond)(c ^ 1);
+}
+
 #define TEMP_VAL_DEAD  0
 #define TEMP_VAL_REG   1
 #define TEMP_VAL_MEM   2
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 2/7] tcg-amd64: Implement setcond and movcond.
  2009-12-17  1:19 [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes Richard Henderson
  2009-12-16  0:34 ` [Qemu-devel] [PATCH 1/7] tcg: Generic support for conditional set and conditional move Richard Henderson
@ 2009-12-16  0:35 ` Richard Henderson
  2009-12-16  0:36 ` [Qemu-devel] [PATCH 3/7] target-alpha: Use setcond/movcond in integer compares and cmoves Richard Henderson
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2009-12-16  0:35 UTC (permalink / raw)
  To: qemu-devel

Implement conditional moves in the x86_64 backend.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/x86_64/tcg-target.c |   65 ++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
index 2339091..e51d099 100644
--- a/tcg/x86_64/tcg-target.c
+++ b/tcg/x86_64/tcg-target.c
@@ -491,9 +491,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
     }
 }
 
-static void tcg_out_brcond(TCGContext *s, int cond, 
-                           TCGArg arg1, TCGArg arg2, int const_arg2,
-                           int label_index, int rexw)
+static void tcg_out_cond(TCGContext *s, int cond, TCGArg arg1,
+                         TCGArg arg2, int const_arg2, int rexw)
 {
     if (const_arg2) {
         if (arg2 == 0) {
@@ -508,9 +507,45 @@ static void tcg_out_brcond(TCGContext *s, int cond,
     } else {
         tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
     }
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond, 
+                           TCGArg arg1, TCGArg arg2, int const_arg2,
+                           int label_index, int rexw)
+{
+    tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
 }
 
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0,
+                            TCGArg arg1, TCGArg arg2, int const_arg2, int rexw)
+{
+    int use_xor = (arg0 != arg1 && (const_arg2 || arg0 != arg2));
+
+    if (use_xor)
+        tcg_out_movi(s, TCG_TYPE_I32, arg0, 0);
+    tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
+    tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT | P_REXB, 0, arg0);
+    if (!use_xor)
+        tgen_arithi32(s, ARITH_AND, arg0, 0xff);
+}
+
+static void tcg_out_movcond(TCGContext *s, int cond, TCGArg arg0,
+                            TCGArg arg1, TCGArg arg2, int const_arg2,
+			    TCGArg arg3, TCGArg arg4, int rexw)
+{
+    if (arg0 == arg3) {
+        cond = tcg_invert_cond(cond);
+        arg3 = arg4;
+        arg4 = arg0;
+    }
+
+    tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
+    if (arg0 != arg4)
+        tcg_out_mov(s, arg0, arg4);
+    tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT | rexw, arg0, arg3);
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
@@ -1197,6 +1232,24 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
         tcg_out_modrm(s, 0x8b, args[0], args[1]);
         break;
 
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2],
+                        const_args[2], 0);
+        break;
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2],
+                        const_args[2], P_REXW);
+        break;
+
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        const_args[2], args[3], args[4], 0);
+        break;
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        const_args[2], args[3], args[4], P_REXW);
+        break;
+
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
         break;
@@ -1376,6 +1429,12 @@ static const TCGTargetOpDef x86_64_op_defs[] = {
     { INDEX_op_ext16u_i64, { "r", "r"} },
     { INDEX_op_ext32u_i64, { "r", "r"} },
 
+    { INDEX_op_setcond_i32, { "r", "r", "re" } },
+    { INDEX_op_setcond_i64, { "r", "r", "re" } },
+
+    { INDEX_op_movcond_i32, { "r", "r", "re", "r", "r" } },
+    { INDEX_op_movcond_i64, { "r", "r", "re", "r", "r" } },
+
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },
     { INDEX_op_qemu_ld16u, { "r", "L" } },
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 3/7] target-alpha: Use setcond/movcond in integer compares and cmoves.
  2009-12-17  1:19 [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes Richard Henderson
  2009-12-16  0:34 ` [Qemu-devel] [PATCH 1/7] tcg: Generic support for conditional set and conditional move Richard Henderson
  2009-12-16  0:35 ` [Qemu-devel] [PATCH 2/7] tcg-amd64: Implement setcond and movcond Richard Henderson
@ 2009-12-16  0:36 ` Richard Henderson
  2009-12-16 23:17 ` [Qemu-devel] [PATCH 4/7] tcg-i386: Implement setcond, movcond, setcond2 Richard Henderson
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2009-12-16  0:36 UTC (permalink / raw)
  To: qemu-devel

Limited usage of setcond/movcond to enable testing in the code generator.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-alpha/translate.c |   66 +++++++++++++++++++++------------------------
 1 files changed, 31 insertions(+), 35 deletions(-)

diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 7b6ff2a..f51d4ef 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -391,33 +391,33 @@ static void gen_fbcond(DisasContext *ctx, TCGCond cond, int ra, int32_t disp)
 static inline void gen_cmov(TCGCond inv_cond, int ra, int rb, int rc,
                             int islit, uint8_t lit, int mask)
 {
-    int l1;
+    TCGv va, vb, zero;
 
     if (unlikely(rc == 31))
         return;
 
-    l1 = gen_new_label();
-
-    if (ra != 31) {
-        if (mask) {
-            TCGv tmp = tcg_temp_new();
-            tcg_gen_andi_i64(tmp, cpu_ir[ra], 1);
-            tcg_gen_brcondi_i64(inv_cond, tmp, 0, l1);
-            tcg_temp_free(tmp);
-        } else
-            tcg_gen_brcondi_i64(inv_cond, cpu_ir[ra], 0, l1);
+    zero = tcg_const_i64(0);
+    if (ra == 31) {
+        va = zero;
+    } else if (mask) {
+        va = tcg_temp_new();
+        tcg_gen_andi_i64(va, cpu_ir[ra], 1);
     } else {
-        /* Very uncommon case - Do not bother to optimize.  */
-        TCGv tmp = tcg_const_i64(0);
-        tcg_gen_brcondi_i64(inv_cond, tmp, 0, l1);
-        tcg_temp_free(tmp);
+        va = cpu_ir[ra];
     }
 
     if (islit)
-        tcg_gen_movi_i64(cpu_ir[rc], lit);
+        vb = tcg_const_i64(lit);
     else
-        tcg_gen_mov_i64(cpu_ir[rc], cpu_ir[rb]);
-    gen_set_label(l1);
+        vb = cpu_ir[rb];
+
+    tcg_gen_movcond_i64(inv_cond, cpu_ir[rc], va, zero, cpu_ir[rc], vb);
+
+    tcg_temp_free(zero);
+    if (mask && ra != 31)
+        tcg_temp_free(va);
+    if (islit)
+        tcg_temp_free(vb);
 }
 
 static void gen_fcmov(TCGCond inv_cond, int ra, int rb, int rc)
@@ -873,30 +873,26 @@ MVIOP2(unpkbw)
 static inline void gen_cmp(TCGCond cond, int ra, int rb, int rc, int islit,
                            uint8_t lit)
 {
-    int l1, l2;
-    TCGv tmp;
+    TCGv va, vb;
 
     if (unlikely(rc == 31))
         return;
 
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-
-    if (ra != 31) {
-        tmp = tcg_temp_new();
-        tcg_gen_mov_i64(tmp, cpu_ir[ra]);
-    } else
-        tmp = tcg_const_i64(0);
+    if (ra == 31)
+        va = tcg_const_i64(0);
+    else
+        va = cpu_ir[ra];
     if (islit)
-        tcg_gen_brcondi_i64(cond, tmp, lit, l1);
+        vb = tcg_const_i64(lit);
     else
-        tcg_gen_brcond_i64(cond, tmp, cpu_ir[rb], l1);
+        vb = cpu_ir[rb];
 
-    tcg_gen_movi_i64(cpu_ir[rc], 0);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i64(cpu_ir[rc], 1);
-    gen_set_label(l2);
+    tcg_gen_setcond_i64(cond, cpu_ir[rc], va, vb);
+
+    if (ra == 31)
+        tcg_temp_free(va);
+    if (islit)
+        tcg_temp_free(vb);
 }
 
 static inline int translate_one(DisasContext *ctx, uint32_t insn)
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 4/7] tcg-i386: Implement setcond, movcond, setcond2.
  2009-12-17  1:19 [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes Richard Henderson
                   ` (2 preceding siblings ...)
  2009-12-16  0:36 ` [Qemu-devel] [PATCH 3/7] target-alpha: Use setcond/movcond in integer compares and cmoves Richard Henderson
@ 2009-12-16 23:17 ` Richard Henderson
  2009-12-16 23:26 ` [Qemu-devel] [PATCH 5/7] tcg-sparc: Implement setcond, movcond, setcond2, brcond2 Richard Henderson
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2009-12-16 23:17 UTC (permalink / raw)
  To: qemu-devel

An initial cut at conditional moves for the i386 backend.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 elf.h                 |    2 +
 tcg/i386/tcg-target.c |  280 ++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 233 insertions(+), 49 deletions(-)

diff --git a/elf.h b/elf.h
index 11674d7..c84c8ab 100644
--- a/elf.h
+++ b/elf.h
@@ -243,6 +243,8 @@ typedef struct {
 #define R_386_GOTOFF	9
 #define R_386_GOTPC	10
 #define R_386_NUM	11
+/* Not a dynamic reloc, so not included in R_386_NUM.  Used in TCG.  */
+#define R_386_PC8	23
 
 #define R_MIPS_NONE		0
 #define R_MIPS_16		1
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 972b102..90dbbe9 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -61,6 +61,9 @@ static void patch_reloc(uint8_t *code_ptr, int type,
     case R_386_PC32:
         *(uint32_t *)code_ptr = value - (long)code_ptr;
         break;
+    case R_386_PC8:
+        *(uint8_t *)code_ptr = value - (long)code_ptr;
+        break;
     default:
         tcg_abort();
     }
@@ -305,7 +308,8 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
         tgen_arithi(s, ARITH_ADD, reg, val, 0);
 }
 
-static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
+/* Use SMALL != 0 to force a short forward branch.  */
+static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
 {
     int32_t val, val1;
     TCGLabel *l = &s->labels[label_index];
@@ -320,6 +324,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
                 tcg_out8(s, 0x70 + opc);
             tcg_out8(s, val1);
         } else {
+            assert (!small);
             if (opc == -1) {
                 tcg_out8(s, 0xe9);
                 tcg_out32(s, val - 5);
@@ -329,6 +334,15 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
                 tcg_out32(s, val - 6);
             }
         }
+    } else if (small) {
+        if (opc == -1) {
+            tcg_out8(s, 0xeb);
+        } else {
+            tcg_out8(s, 0x0f);
+            tcg_out8(s, 0x70 + opc);
+        }
+        tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
+        s->code_ptr += 1;
     } else {
         if (opc == -1) {
             tcg_out8(s, 0xe9);
@@ -341,9 +355,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
     }
 }
 
-static void tcg_out_brcond(TCGContext *s, int cond, 
-                           TCGArg arg1, TCGArg arg2, int const_arg2,
-                           int label_index)
+static void tcg_out_cond(TCGContext *s, int cond,
+			 TCGArg arg1, TCGArg arg2, int const_arg2)
 {
     if (const_arg2) {
         if (arg2 == 0) {
@@ -355,71 +368,225 @@ static void tcg_out_brcond(TCGContext *s, int cond,
     } else {
         tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
     }
-    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond, 
+                           TCGArg arg1, TCGArg arg2, int const_arg2,
+                           int label_index, int small)
+{
+    tcg_out_cond(s, cond, arg1, arg2, const_arg2);
+    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
 }
 
 /* XXX: we implement it at the target level to avoid having to
    handle cross basic blocks temporaries */
-static void tcg_out_brcond2(TCGContext *s,
-                            const TCGArg *args, const int *const_args)
+static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
+                            const int *const_args, int small)
 {
-    int label_next;
-    label_next = gen_new_label();
-    switch(args[4]) {
+    int label_next = gen_new_label();
+    int label_dest = args[5];
+    int cond = args[4], c1, c2, c3;
+
+    switch (cond) {
     case TCG_COND_EQ:
-        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next);
-        tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3], args[5]);
+        c1 = -1, c2 = TCG_COND_NE, c3 = TCG_COND_EQ;
         break;
     case TCG_COND_NE:
-        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], args[5]);
-        tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3], args[5]);
+        c1 = TCG_COND_NE, c2 = -1, c3 = TCG_COND_NE;
         break;
     case TCG_COND_LT:
-        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
-        break;
-    case TCG_COND_LE:
-        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
-        break;
-    case TCG_COND_GT:
-        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
-        break;
-    case TCG_COND_GE:
-        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
-        break;
     case TCG_COND_LTU:
-        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
+        c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_LTU;
         break;
+    case TCG_COND_LE:
     case TCG_COND_LEU:
-        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
+        c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_LEU;
         break;
+    case TCG_COND_GT:
     case TCG_COND_GTU:
-        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
+        c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_GTU;
         break;
+    case TCG_COND_GE:
     case TCG_COND_GEU:
-        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
+        c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_GEU;
         break;
     default:
-        tcg_abort();
+        tcg_abort ();
+    }
+
+    tcg_out_cond(s, cond, args[1], args[3], const_args[3]);
+    if (c1 != -1) {
+        tcg_out_jxx(s, tcg_cond_to_jcc[c1], label_dest, small);
+    }
+    if (c2 != -1) {
+        tcg_out_jxx(s, tcg_cond_to_jcc[c2], label_next, 1);
     }
+    tcg_out_brcond(s, c3, args[0], args[2], const_args[2], label_dest, small);
+
     tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
 }
 
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0,
+                            TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+    int use_xor = (arg0 != arg1 && (const_arg2 || arg0 != arg2));
+
+    if (use_xor)
+        tcg_out_movi(s, TCG_TYPE_I32, arg0, 0);
+    tcg_out_cond(s, cond, arg1, arg2, const_arg2);
+    tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT, 0, arg0);
+    if (!use_xor)
+        tgen_arithi(s, ARITH_AND, arg0, 0xff, 0);
+}
+
+static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
+                             const int *const_args)
+{
+    int overlapl, overlaph;
+    TCGArg new_args[6];
+    int label_true, label_over;
+
+    overlapl = (args[0] == args[1] || (!const_args[3] && args[0] == args[3]));
+    overlaph = (args[0] == args[2] || (!const_args[4] && args[0] == args[4]));
+    memcpy(new_args, args+1, 5*sizeof(TCGArg));
+
+    if (!overlapl && !overlaph) {
+        /* ??? For EQ and NE, and output register in 'q', we could
+           implement this as cmp lows; setb %al; cmp highs; setb %ah;
+           andb %ah, %al; movzbl %al, %eax it's not clear it's worth
+           it though.  */
+
+        /* When possible, clear the destination first and increment in
+           the true case.  This results in smaller code than the
+           general case below.  */
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+
+        label_over = gen_new_label();
+        new_args[5] = label_over;
+        tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
+        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+    } else {
+        label_true = gen_new_label();
+        label_over = gen_new_label();
+
+        new_args[5] = label_true;
+        tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+        tcg_out_jxx(s, JCC_JMP, label_over, 1);
+        tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
+
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
+        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+    }
+}
+
+static inline int have_cmov(void)
+{
+#ifdef __i686__
+    /* Compiler options say that cmov is available.  */
+    return 1;
+#else
+    /* ??? Use cpuid or something and figure out what's running.  */
+    return 0;
+#endif
+}
+
+static void tcg_out_movcond(TCGContext *s, const TCGArg *args,
+                            const int *const_args)
+{
+    int vtc, vfc, cond, use_cmov = 0, do_swap = 0;
+    TCGArg d, vt, vf;
+
+    d = args[0];
+    vt = args[3];
+    vf = args[4];
+    vtc = const_args[3];
+    vfc = const_args[4];
+
+    /* ??? The jcc code path below assumes that one mov insn must be skipped.
+       Rather than complicate the code below, make sure to simplify the
+       conditional move here.  */
+    if (vtc == vfc && vt == vf) {
+        if (vtc)
+            tcg_out_movi(s, TCG_TYPE_I32, d, vt);
+        else
+            tcg_out_mov(s, d, vt);
+        return;
+    }
+
+    cond = args[5];
+
+    /* If both arguments are constants, we *could* do all the funny bits that
+       gcc does with sbc, masks, etc.  There's likely no point.  Just use the
+       jcc version in this case.  We also have to be careful about clobbering
+       inputs when trying to move constants into position.  */
+
+    if (have_cmov()) {
+        use_cmov = 1;
+        if (vtc) {
+            if (vfc || d == vf)
+                use_cmov = 0;
+            else
+                do_swap = 1;
+        } else if (d == vt) {
+            if (vfc)
+                use_cmov = 0;
+            else
+                do_swap = 1;
+        }
+    }
+
+    if (!use_cmov) {
+        /* We're going to follow the lead of cmov and set D=VF first,
+           which means inverting the condition upon which we jump.  */
+        cond = tcg_invert_cond(cond);
+
+        /* Don't allow the move we jump over to be a nop.  */
+        do_swap = (!vtc && d == vt);
+    }
+
+    if (do_swap) {
+        TCGArg t;
+        cond = tcg_invert_cond(cond);
+        t = vf, vf = vt, vt = t;
+        t = vfc, vfc = vtc, vtc = t;
+    }
+
+    /* If possible, set D=0 before the compare, so that we can use XOR.  */
+    if (vfc && vf == 0 && d != args[1] && (const_args[2] || d != args[2])) {
+        tcg_out_movi(s, TCG_TYPE_I32, d, vf);
+        vf = d, vfc = 0;
+    }
+
+    tcg_out_cond(s, cond, args[1], args[2], const_args[2]);
+            
+    if (vfc) {
+        /* Force the use of "mov $0, d" to avoid clobbering flags.  */
+        tcg_out8(s, 0xb8 + d);
+        tcg_out32(s, vf);
+    } else {
+        tcg_out_mov(s, d, vf);
+    }
+
+    if (use_cmov) {
+        assert (!vtc);
+        tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT, d, vt);
+    } else {
+        int label_next = gen_new_label();
+
+        tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_next, 1);
+        if (vtc)
+            tcg_out_movi(s, TCG_TYPE_I32, d, vt);
+        else
+            tcg_out_mov(s, d, vt);
+
+        tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
+    }
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
@@ -913,7 +1080,7 @@ static inline void tcg_out_op(TCGContext *s, int opc,
         }
         break;
     case INDEX_op_br:
-        tcg_out_jxx(s, JCC_JMP, args[0]);
+        tcg_out_jxx(s, JCC_JMP, args[0], 0);
         break;
     case INDEX_op_movi_i32:
         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
@@ -1044,10 +1211,11 @@ static inline void tcg_out_op(TCGContext *s, int opc,
             tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
         break;
     case INDEX_op_brcond_i32:
-        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
+        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+                       args[3], 0);
         break;
     case INDEX_op_brcond2_i32:
-        tcg_out_brcond2(s, args, const_args);
+        tcg_out_brcond2(s, args, const_args, 0);
         break;
 
     case INDEX_op_bswap16_i32:
@@ -1080,6 +1248,16 @@ static inline void tcg_out_op(TCGContext *s, int opc,
         tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
         break;
 
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
+        break;
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args, const_args);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args, const_args);
+        break;
+
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
         break;
@@ -1168,6 +1346,10 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_ext8u_i32, { "r", "q"} },
     { INDEX_op_ext16u_i32, { "r", "r"} },
 
+    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
+    { INDEX_op_movcond_i32, { "r", "r", "ri", "ri", "ri" } },
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
+
 #if TARGET_LONG_BITS == 32
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 5/7] tcg-sparc: Implement setcond, movcond, setcond2, brcond2.
  2009-12-17  1:19 [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes Richard Henderson
                   ` (3 preceding siblings ...)
  2009-12-16 23:17 ` [Qemu-devel] [PATCH 4/7] tcg-i386: Implement setcond, movcond, setcond2 Richard Henderson
@ 2009-12-16 23:26 ` Richard Henderson
  2009-12-19 10:31   ` Blue Swirl
  2009-12-16 23:28 ` [Qemu-devel] [PATCH 6/7] target-i386: Use setcond and movcond Richard Henderson
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Richard Henderson @ 2009-12-16 23:26 UTC (permalink / raw)
  To: qemu-devel

An initial cut at conditional moves for the sparc backend.

Untested, as I don't have sparc hardware and the build system
resists attempts at cross-compilation.

Note fixes to tcg_out_movi_imm32 (wrong check_fit_tl width),
use of TCG_TARGET_REG_BITS == 64 tests instead of explicitly
checking for __sparc_v9__ everywhere.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/sparc/tcg-target.c |  415 +++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 360 insertions(+), 55 deletions(-)

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 23cd9cd..351683a 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -191,6 +191,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define INSN_RS2(x) (x)
 #define INSN_ASI(x) ((x) << 5)
 
+#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
+#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
 #define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
 #define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
 #define INSN_OFF22(x) (((x) >> 2) & 0x3fffff)
@@ -214,6 +216,20 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define COND_VC    0xf
 #define BA         (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2))
 
+#define BPCC_ICC   (0)
+#defien BPCC_XCC   (1 << 21)
+
+#define MOVCC_ICC  (1 << 18)
+#define MOVCC_XCC  (1 << 18 | 1 << 12)
+
+#define MRCOND_E   0x1
+#define MRCOND_LE  0x2
+#define MRCOND_L   0x3
+#define MRCOND_NE  0x5
+#define MRCOND_G   0x6
+#define MRCOND_GE  0x7
+#define INSN_MRCOND(c) ((c) << 10)
+
 #define ARITH_ADD  (INSN_OP(2) | INSN_OP3(0x00))
 #define ARITH_AND  (INSN_OP(2) | INSN_OP3(0x01))
 #define ARITH_OR   (INSN_OP(2) | INSN_OP3(0x02))
@@ -228,7 +244,9 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f))
 #define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09))
 #define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
+#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
 #define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
+#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
 
 #define SHIFT_SLL  (INSN_OP(2) | INSN_OP3(0x25))
 #define SHIFT_SRL  (INSN_OP(2) | INSN_OP3(0x26))
@@ -287,7 +305,8 @@ static inline void tcg_out_arithi(TCGContext *s, int rd, int rs1,
 
 static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
 {
-    tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
+    if (ret != arg)
+        tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
 }
 
 static inline void tcg_out_sethi(TCGContext *s, int ret, uint32_t arg)
@@ -302,7 +321,7 @@ static inline void tcg_out_movi_imm13(TCGContext *s, int ret, uint32_t arg)
 
 static inline void tcg_out_movi_imm32(TCGContext *s, int ret, uint32_t arg)
 {
-    if (check_fit_tl(arg, 12))
+    if (check_fit_tl(arg, 13))
         tcg_out_movi_imm13(s, ret, arg);
     else {
         tcg_out_sethi(s, ret, arg);
@@ -314,22 +333,20 @@ static inline void tcg_out_movi_imm32(TCGContext *s, int ret, uint32_t arg)
 static inline void tcg_out_movi(TCGContext *s, TCGType type,
                                 int ret, tcg_target_long arg)
 {
-#if defined(__sparc_v9__) && !defined(__sparc_v8plus__)
-    if (!check_fit_tl(arg, 32) && (arg & ~0xffffffffULL) != 0) {
-        tcg_out_movi_imm32(s, TCG_REG_I4, arg >> 32);
-        tcg_out_arithi(s, TCG_REG_I4, TCG_REG_I4, 32, SHIFT_SLLX);
+    if (type == TCG_TYPE_I32 || (arg & ~(tcg_target_long)0xffffffff))
         tcg_out_movi_imm32(s, ret, arg);
-        tcg_out_arith(s, ret, ret, TCG_REG_I4, ARITH_OR);
-    } else if (check_fit_tl(arg, 12))
-        tcg_out_movi_imm13(s, ret, arg);
-    else {
-        tcg_out_sethi(s, ret, arg);
-        if (arg & 0x3ff)
-            tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
+    else if (TCG_TARGET_REG_BITS == 64) {
+        if (check_fit_tl(arg, 32)) {
+            /* Sign extended 32-bit constants are formed with SETHI+XOR.  */
+            tcg_out_sethi(s, ret, ~arg);
+            tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
+        } else {
+            tcg_out_movi_imm32(s, TCG_REG_I4, arg >> 32);
+            tcg_out_arithi(s, TCG_REG_I4, TCG_REG_I4, 32, SHIFT_SLLX);
+            tcg_out_movi_imm32(s, ret, arg);
+            tcg_out_arith(s, ret, ret, TCG_REG_I4, ARITH_OR);
+        }
     }
-#else
-    tcg_out_movi_imm32(s, ret, arg);
-#endif
 }
 
 static inline void tcg_out_ld_raw(TCGContext *s, int ret,
@@ -345,16 +362,18 @@ static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
 {
     if (!check_fit_tl(arg, 10))
         tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ffULL);
-#if defined(__sparc_v9__) && !defined(__sparc_v8plus__)
-    tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) |
-              INSN_IMM13(arg & 0x3ff));
-#else
-    tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
-              INSN_IMM13(arg & 0x3ff));
-#endif
+
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) |
+                  INSN_IMM13(arg & 0x3ff));
+    } else { 
+        tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
+                  INSN_IMM13(arg & 0x3ff));
+    }
 }
 
-static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset, int op)
+static inline void tcg_out_ldst(TCGContext *s, int ret, int addr,
+                                int offset, int op)
 {
     if (check_fit_tl(offset, 13))
         tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
@@ -435,16 +454,16 @@ static void tcg_out_branch_i32(TCGContext *s, int opc, int label_index)
     TCGLabel *l = &s->labels[label_index];
 
     if (l->has_value) {
-        val = l->u.value - (tcg_target_long)s->code_ptr;
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2)
+        int32_t val = l->u.value - (tcg_target_long)s->code_ptr;
+        tcg_out32(s, (INSN_OP(0) | opc | INSN_OP2(0x2)
                       | INSN_OFF22(l->u.value - (unsigned long)s->code_ptr)));
     } else {
         tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP22, label_index, 0);
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | 0));
+        tcg_out32(s, (INSN_OP(0) | opc | INSN_OP2(0x2) | 0));
     }
 }
 
-#if defined(__sparc_v9__) && !defined(__sparc_v8plus__)
+#if TCG_TARGET_REG_BITS == 64
 static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index)
 {
     int32_t val;
@@ -452,13 +471,11 @@ static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index)
 
     if (l->has_value) {
         val = l->u.value - (tcg_target_long)s->code_ptr;
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
-                      (0x5 << 19) |
+        tcg_out32(s, (INSN_OP(0) | opc | INSN_OP2(0x1) | (0x5 << 19) |
                       INSN_OFF19(l->u.value - (unsigned long)s->code_ptr)));
     } else {
         tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label_index, 0);
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
-                      (0x5 << 19) | 0));
+        tcg_out32(s, (INSN_OP(0) | opc | INSN_OP2(0x1) | (0x5 << 19) | 0));
     }
 }
 #endif
@@ -476,34 +493,280 @@ static const uint8_t tcg_cond_to_bcond[10] = {
     [TCG_COND_GTU] = COND_GU,
 };
 
+static void tcg_out_cmp(TCGContext *s, TCGArg c1, TCGArg c2, int c2const)
+{
+    if (c2const)
+        tcg_out_arithi(s, TCG_REG_G0, c1, c2, ARITH_SUBCC);
+    else
+        tcg_out_arith(s, TCG_REG_G0, c1, c2, ARITH_SUBCC);
+}
+
 static void tcg_out_brcond_i32(TCGContext *s, int cond,
                                TCGArg arg1, TCGArg arg2, int const_arg2,
                                int label_index)
 {
-    if (const_arg2 && arg2 == 0)
-        /* orcc %g0, r, %g0 */
-        tcg_out_arith(s, TCG_REG_G0, TCG_REG_G0, arg1, ARITH_ORCC);
-    else
-        /* subcc r1, r2, %g0 */
-        tcg_out_arith(s, TCG_REG_G0, arg1, arg2, ARITH_SUBCC);
-    tcg_out_branch_i32(s, tcg_cond_to_bcond[cond], label_index);
+    tcg_out_cmp(s, arg1, arg2, const_arg2);
+    tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[cond], 0), label_index);
     tcg_out_nop(s);
 }
 
-#if defined(__sparc_v9__) && !defined(__sparc_v8plus__)
+#if TCG_TARGET_REG_BITS == 64
 static void tcg_out_brcond_i64(TCGContext *s, int cond,
                                TCGArg arg1, TCGArg arg2, int const_arg2,
                                int label_index)
 {
-    if (const_arg2 && arg2 == 0)
-        /* orcc %g0, r, %g0 */
-        tcg_out_arith(s, TCG_REG_G0, TCG_REG_G0, arg1, ARITH_ORCC);
-    else
-        /* subcc r1, r2, %g0 */
-        tcg_out_arith(s, TCG_REG_G0, arg1, arg2, ARITH_SUBCC);
-    tcg_out_branch_i64(s, tcg_cond_to_bcond[cond], label_index);
+    tcg_out_cmp(s, arg1, arg2, const_arg2);
+    tcg_out_branch_i64(s, INSN_COND(tcg_cond_to_bcond[cond], 0) | BPCC_XCC,
+                       label_index);
     tcg_out_nop(s);
 }
+#else
+static void tcg_out_brcond2_i32(TCGContext *s, int cond,
+                                TCGArg al, TCGArg ah,
+                                TCGArg bl, int blconst,
+                                TCGArg bh, int bhconst, int label_dest)
+{
+    int label_next = gen_new_label();
+    int c1, c2, c3;
+
+    /* ??? For v8plus, consider reserving two global registers so that we
+       can reconstruct the 64-bit values there and compare them directly.  */
+
+    switch (cond) {
+    case TCG_COND_EQ:
+        c1 = -1, c2 = TCG_COND_NE, c3 = TCG_COND_EQ;
+        break;
+    case TCG_COND_NE:
+        c1 = TCG_COND_NE, c2 = -1, c3 = TCG_COND_NE;
+        break;
+    case TCG_COND_LT:
+    case TCG_COND_LTU:
+        c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_LTU;
+        break;
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+        c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_LEU;
+        break;
+    case TCG_COND_GT:
+    case TCG_COND_GTU:
+        c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_GTU;
+        break;
+    case TCG_COND_GE:
+    case TCG_COND_GEU:
+        c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_GEU;
+        break;
+    default:
+        tcg_abort ();
+    }
+
+    tcg_out_cmp(s, ah, bh, bhconst);
+    if (c1 != -1) {
+        tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[c1], 0), label_dest);
+        if (c2 != -1)
+            tcg_out_nop(s);
+    }
+    if (c2 != -1) {
+        tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[c2], 0), label_next);
+    }
+    tcg_out_cmp(s, al, bl, blconst);
+    tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[c3], 0), label_dest);
+    tcg_out_nop(s);
+
+    tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
+}
+#endif
+
+#if defined(__sparc_v9__) || defined(__sparc_v8plus__)
+static const uint8_t tcg_cond_to_mrcond[10] = {
+    [TCG_COND_EQ] = MRCOND_E,
+    [TCG_COND_NE] = MRCOND_NE,
+    [TCG_COND_LT] = MRCOND_L,
+    [TCG_COND_GE] = MRCOND_GE,
+    [TCG_COND_LE] = MRCOND_LE,
+    [TCG_COND_GT] = MRCOND_G
+};
+#endif
+
+static void tcg_movcond(TCGContect *s, int cond, TCGArg d,
+                        TCGArg c1, TCGArg c2, int c2const,
+                        TCGArg vt, int vtconst, TCGArg vf, int vfconst,
+                        int i64 __attribute__((unused)))
+{
+    TCGArg t;
+
+    if (vtconst == vfconst && vt == vf) {
+        if (vtconst)
+            tcg_out_movi_imm13(s, d, vt);
+        else
+            tcg_out_mov(s, d, vt);
+        return;
+    }
+
+#if defined(__sparc_v9__) || defined(__sparc_v8plus__)
+    /* Use MOVR when possible.  We have constrained the constants to IMM11
+       for the sake of MOVCC, but MOVR only takes IMM10.  Also, we cannot
+       overwrite the condition with our initial store to D.  Any attempt to
+       work around these extra conditions is just as much work as falling
+       back to MOVCC.  */
+    if (c2 == 0 && tcg_cond_to_mrcond[cond] && d != c1) {
+        int use_movr = 1, do_swap = 0;
+
+        if (vtconst) {
+            if (!check_fit_tl(vt, 10)) {
+                do_swap = 1;
+                if (vfconst)
+                    use_movr = check_fit_tl(vt, 10);
+                else
+                    use_movr = (d != vf);
+            }
+        } else if (d == vt) {
+            do_swap = 1;
+            if (vfconst)
+                use_movr = check_fit_tl(vt, 10);
+        }
+        if (use_movr) {
+            if (do_swap) {
+                cond = tcg_invert_cond(cond);
+                t = vt, vt = vf, vf = t;
+                t = vtconst, vtconst = vfconst, vfconst = t;
+            }
+            if (vfconst)
+                tcg_out_movi_imm13(s, d, vf);
+            else
+                tcg_out_mov(s, d, vf);
+            tcg_out32(ARITH_MOVR | INSN_RD(d) | INSN_RS1(c1)
+                      | INSN_MRCOND(tcg_cond_to_mrcond[cond])
+                      | (vtconst ? INSN_IMM10(vt) : INSN_RS2(vt)));
+            return;
+        }
+    }
+
+    tcg_out_cmp(s, c1, c2, c2const);
+
+    if (!vtconst && vt == d) {
+        cond = tcg_invert_cond(cond);
+        vt = vf, vf = d;
+        vtconst = vfconst, vfconst = 0;
+    }
+    if (vfconst)
+        tcg_out_movi_imm13(s, d, vf);
+    else
+        tcg_out_mov(s, d, vf);
+    tcg_out32(ARITH_MOVCC | INSN_RD(d)
+              | INSN_RS1(tcg_cond_to_bcond[cond])
+              | (i64 ? MOVCC_XCC : MOVCC_ICC)
+              | (vtconst ? INSN_IMM11(vt) : INSN_RS2(vt)));
+#else
+    t = gen_new_label ();
+
+    tcg_out_cmp(s, c1, c2, c2const);
+    tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[cond], 1), label);
+    if (vtconst)
+        tcg_out_movi_imm13(s, d, vt);
+    else
+        tcg_out_mov(s, d, vt);
+    if (vfconst)
+        tcg_out_movi_imm13(s, d, vf);
+    else
+        tcg_out_mov(s, d, vf);
+
+    tcg_out_label(s, t, (tcg_target_long)s->code_ptr);
+#endif
+}
+
+static void tcg_setcond_i32(TCGContext *s, int cond, TCGArg d,
+                            TCGArg c1, TCGArg c2, int c2const)
+{
+    TCGArg t;
+
+    /* For 32-bit comparisons, we can play games with ADDX/SUBX in order
+       to get the correct value into the register.  Don't go beyond this
+       because the movcond fallback is only 4 insns.  */
+    swtich (cond) {
+    case TCG_COND_EQ:
+        if (c2 != 0) {
+            if (c2const)
+                tcg_out_arithi(s, d, c1, c2, ARITH_XOR);
+            else
+                tcg_out_arith(s, d, c1, c2, ARITH_XOR);
+        }
+        c1 = d, c2 = TCG_REG_G0, c2const = 0;
+        cond = TCG_COND_LEU;
+        break;
+
+    case TCG_COND_NE:
+        if (c2 != 0) {
+            if (c2const)
+                tcg_out_arithi(s, d, c1, c2, ARITH_XOR);
+            else
+                tcg_out_arith(s, d, c1, c2, ARITH_XOR);
+        }
+        c1 = TCG_REG_G0, c2 = d, c2const = 0;
+        cond = TCG_COND_LTU;
+        break
+
+    case TCG_COND_GTU:
+    case TCG_COND_GEU:
+        if (c2const && c2 != 0) {
+            tcg_out_movi_imm13(s, TCG_REG_I5, c2);
+            c2 = TCG_REG_I5;
+        }
+        t = c1, c1 = c2, c2 = t, c2const = 0;
+        cond = (cond == TCG_COND_GTU ? TCG_COND_LTU : TCG_COND_LEU);
+        break;
+
+    case TCG_COND_LTU:
+    case TCG_COND_LEU:
+        break;
+
+    default:
+        tcg_movcond(s, cond, d, c1, c2, c2const, 1, 1, 0, 1, 0);
+        return;
+    }
+
+    tcg_out_arith(s, TCG_REG_G0, c1, c2, ARITH_SUBCC);
+    if (cond == TCG_COND_LTU)
+        tcg_out_arithi(s, d, TCG_REG_G0, 0, ARITH_ADDX);
+    else
+        tcg_out_arithi(s, d, TCG_REG_G0, -1, ARITH_SUBX);
+}
+
+#if TCG_TARGET_REG_BITS == 32
+static void tcg_out_setcond2_i32(TCGContext *s, int cond, TCGArg d,
+                                 TCGArg al, TCGArg ah, TCGArg bl, int blconst,
+                                 TCGArg bh, int bhconst)
+{
+    TCGArg scratch = TCG_REG_I5;
+    int label;
+
+    if (d != al && d != ah && (blconst || d != bl) && (bhconst || d != bh))
+        scratch = d;
+
+    /* ??? For v8plus, consider reserving two global registers so that we
+       can reconstruct the 64-bit values there and compare them directly.  */
+
+    switch (cond) {
+    case TCG_COND_EQ:
+        tcg_out_setcond_i32(s, TCG_COND_EQ, scratch, al, bl, blconst);
+        tcg_out_movcond(s, TCG_COND_EQ, scratch, ah, bh, bhconst,
+                        scratch, 0, 0, 1, 0);
+        break;
+    case TCG_COND_NE:
+        tcg_out_setcond_i32(s, TCG_COND_NE, scratch, al, bl, blconst);
+        tcg_out_movcond(s, TCG_COND_NE, scratch, ah, bh, bhconst,
+                        1, 1, scratch, 0);
+        break;
+
+    default:
+        label = gen_new_label();
+        tcg_out_movi_imm13(s, scratch, 1);
+        tcg_out_brcond2_i32(s, cond, al, ah, bl, blconst, bh, bhconst, label);
+        tcg_out_movi_imm13(s, scratch, 0);
+        tcg_out_label(s, label, (tcg_target_long)s->code_ptr);
+        break;
+    }
+
+    tcg_out_mov(s, d, scratch);}
 #endif
 
 /* Generate global QEMU prologue and epilogue code */
@@ -986,7 +1249,7 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
         tcg_out_movi(s, TCG_TYPE_I32, args[0], (uint32_t)args[1]);
         break;
 
-#if defined(__sparc_v9__) && !defined(__sparc_v8plus__)
+#if TCG_TARGET_REG_BITS == 64
 #define OP_32_64(x)                             \
         glue(glue(case INDEX_op_, x), _i32:)    \
         glue(glue(case INDEX_op_, x), _i64:)
@@ -1007,7 +1270,7 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
         tcg_out_ldst(s, args[0], args[1], args[2], LDSH);
         break;
     case INDEX_op_ld_i32:
-#if defined(__sparc_v9__) && !defined(__sparc_v8plus__)
+#if TCG_TARGET_REG_BITS == 64
     case INDEX_op_ld32u_i64:
 #endif
         tcg_out_ldst(s, args[0], args[1], args[2], LDUW);
@@ -1019,7 +1282,7 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
         tcg_out_ldst(s, args[0], args[1], args[2], STH);
         break;
     case INDEX_op_st_i32:
-#if defined(__sparc_v9__) && !defined(__sparc_v8plus__)
+#if TCG_TARGET_REG_BITS == 64
     case INDEX_op_st32_i64:
 #endif
         tcg_out_ldst(s, args[0], args[1], args[2], STW);
@@ -1074,6 +1337,26 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
         tcg_out_brcond_i32(s, args[2], args[0], args[1], const_args[1],
                            args[3]);
         break;
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond_i32(s, args[3], args[0], args[1],
+                            args[2], const_args[2]);
+        break;
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        const_args[2], args[3], const_args[3],
+                        args[4], const_args[4], 0);
+        break;
+#if TCG_TARGET_REG_BITS == 32
+    case INDEX_op_brcond2_i32:
+        tcg_out_brcond2_i32(s, args[4], args[0], args[1],
+                            args[2], const_args[2],
+                            args[3], const_args[3], args[5]);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2_i32(s, args[5], args[0], args[1], args[2],
+                             args[3], const_args[3], args[4], const_args[4]);
+        break;
+#endif
 
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
@@ -1103,7 +1386,7 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
         tcg_out_qemu_st(s, args, 2);
         break;
 
-#if defined(__sparc_v9__) && !defined(__sparc_v8plus__)
+#if TCG_TARGET_REG_BITS == 64
     case INDEX_op_movi_i64:
         tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
         break;
@@ -1139,6 +1422,16 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
         tcg_out_brcond_i64(s, args[2], args[0], args[1], const_args[1],
                            args[3]);
         break;
+    case INDEX_op_setcond_i64:
+        tcg_out_movcond(s, args[3], args[0], args[1], args[2],
+                        const_args[2], 1, 1, 0, 1, 1);
+        break;
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+                        const_args[2], args[3], const_args[3],
+                        args[4], const_args[4], 1);
+        break;
+
     case INDEX_op_qemu_ld64:
         tcg_out_qemu_ld(s, args, 3);
         break;
@@ -1192,7 +1485,17 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_shr_i32, { "r", "r", "rJ" } },
     { INDEX_op_sar_i32, { "r", "r", "rJ" } },
 
-    { INDEX_op_brcond_i32, { "r", "ri" } },
+    { INDEX_op_brcond_i32, { "r", "rJ" } },
+    { INDEX_op_setcond_i32, { "r", "r", "rJ" } },
+#if defined(__sparc_v9__) || defined(__sparc_v8plus__)
+    { INDEX_op_movcond_i32, { "r", "r", "rJ", "rI", "rI" } },
+#else
+    { INDEX_op_movcond_i32, { "r", "r", "rJ", "rJ", "rJ" } },
+#endif
+#if TCG_TARGET_REG_BITS == 32
+    { INDEX_op_brcond2_i32, { "r", "r", "rJ", "rJ" } },
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "rJ", "rJ" } },
+#endif
 
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },
@@ -1235,7 +1538,9 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_shr_i64, { "r", "r", "rJ" } },
     { INDEX_op_sar_i64, { "r", "r", "rJ" } },
 
-    { INDEX_op_brcond_i64, { "r", "ri" } },
+    { INDEX_op_brcond_i64, { "r", "rJ" } },
+    { INDEX_op_setcond_i64, { "r", "r", "rJ" } },
+    { INDEX_op_movcond_i64, { "r", "r", "rJ", "rI", "rI" } },
 #endif
     { -1 },
 };
@@ -1243,7 +1548,7 @@ static const TCGTargetOpDef sparc_op_defs[] = {
 void tcg_target_init(TCGContext *s)
 {
     tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
-#if defined(__sparc_v9__) && !defined(__sparc_v8plus__)
+#if TCG_TARGET_REG_BITS == 64
     tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
 #endif
     tcg_regset_set32(tcg_target_call_clobber_regs, 0,
@@ -1264,7 +1569,7 @@ void tcg_target_init(TCGContext *s)
 
     tcg_regset_clear(s->reserved_regs);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0);
-#if defined(__sparc_v9__) && !defined(__sparc_v8plus__)
+#if TCG_TARGET_REG_BITS == 32
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_I4); // for internal use
 #endif
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_I5); // for internal use
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 6/7] target-i386: Use setcond and movcond.
  2009-12-17  1:19 [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes Richard Henderson
                   ` (4 preceding siblings ...)
  2009-12-16 23:26 ` [Qemu-devel] [PATCH 5/7] tcg-sparc: Implement setcond, movcond, setcond2, brcond2 Richard Henderson
@ 2009-12-16 23:28 ` Richard Henderson
  2009-12-16 23:29 ` [Qemu-devel] [PATCH 7/7] target-mips: " Richard Henderson
  2009-12-17 15:32 ` [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes malc
  7 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2009-12-16 23:28 UTC (permalink / raw)
  To: qemu-devel

Splits out the condition code handling into a new function that's
directly callable from setcc and cmov expanders.  From there we can
directly emit the operation we care about.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-i386/translate.c |  192 +++++++++++++++++++++-------------------------
 tcg/tcg-op.h            |    4 +
 2 files changed, 92 insertions(+), 104 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 64bc0a3..b29141b 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -888,64 +888,28 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
     }
 }
 
-/* return true if setcc_slow is not needed (WARNING: must be kept in
-   sync with gen_jcc1) */
-static int is_fast_jcc_case(DisasContext *s, int b)
+typedef struct
 {
-    int jcc_op;
-    jcc_op = (b >> 1) & 7;
-    switch(s->cc_op) {
-        /* we optimize the cmp/jcc case */
-    case CC_OP_SUBB:
-    case CC_OP_SUBW:
-    case CC_OP_SUBL:
-    case CC_OP_SUBQ:
-        if (jcc_op == JCC_O || jcc_op == JCC_P)
-            goto slow_jcc;
-        break;
-
-        /* some jumps are easy to compute */
-    case CC_OP_ADDB:
-    case CC_OP_ADDW:
-    case CC_OP_ADDL:
-    case CC_OP_ADDQ:
-
-    case CC_OP_LOGICB:
-    case CC_OP_LOGICW:
-    case CC_OP_LOGICL:
-    case CC_OP_LOGICQ:
-
-    case CC_OP_INCB:
-    case CC_OP_INCW:
-    case CC_OP_INCL:
-    case CC_OP_INCQ:
-
-    case CC_OP_DECB:
-    case CC_OP_DECW:
-    case CC_OP_DECL:
-    case CC_OP_DECQ:
-
-    case CC_OP_SHLB:
-    case CC_OP_SHLW:
-    case CC_OP_SHLL:
-    case CC_OP_SHLQ:
-        if (jcc_op != JCC_Z && jcc_op != JCC_S)
-            goto slow_jcc;
-        break;
-    default:
-    slow_jcc:
-        return 0;
-    }
-    return 1;
-}
+    TCGCond cond;
+    _Bool op1_z;
+    _Bool slow_T0;
+    TCGv op0, op1;
+} jcc2_result;
 
-/* generate a conditional jump to label 'l1' according to jump opcode
-   value 'b'. In the fast case, T0 is guaranted not to be used. */
-static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
+/* Evaluate a conditional according to jump opcode value 'b'.
+   In the fast case, T0 is guaranted not to be used. */
+static inline jcc2_result gen_jcc2(DisasContext *s, int cc_op, int b)
 {
+    jcc2_result ret;
     int inv, jcc_op, size, cond;
     TCGv t0;
 
+    ret.cond = -1;
+    ret.op1_z = 0;
+    ret.slow_T0 = 0;
+    TCGV_UNUSED(ret.op0);
+    TCGV_UNUSED(ret.op1);
+
     inv = b & 1;
     jcc_op = (b >> 1) & 7;
 
@@ -979,31 +943,37 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
                 t0 = cpu_cc_dst;
                 break;
             }
-            tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
+            ret.cond = inv ? TCG_COND_NE : TCG_COND_EQ;
+            ret.op0 = t0;
+            ret.op1_z = 1;
             break;
         case JCC_S:
         fast_jcc_s:
             switch(size) {
             case 0:
                 tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
-                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
-                                   0, l1);
+                ret.cond = inv ? TCG_COND_EQ : TCG_COND_NE;
+                ret.op0 = cpu_tmp0;
+                ret.op1_z = 1;
                 break;
             case 1:
                 tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
-                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
-                                   0, l1);
+                ret.cond = inv ? TCG_COND_EQ : TCG_COND_NE;
+                ret.op0 = cpu_tmp0;
+                ret.op1_z = 1;
                 break;
 #ifdef TARGET_X86_64
             case 2:
                 tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
-                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
-                                   0, l1);
+                ret.cond = inv ? TCG_COND_EQ : TCG_COND_NE;
+                ret.op0 = cpu_tmp0;
+                ret.op1_z = 1;
                 break;
 #endif
             default:
-                tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, 
-                                   0, l1);
+                ret.cond = inv ? TCG_COND_GE : TCG_COND_LT;
+                ret.op0 = cpu_cc_dst;
+                ret.op1_z = 1;
                 break;
             }
             break;
@@ -1037,7 +1007,9 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
                 t0 = cpu_cc_src;
                 break;
             }
-            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
+            ret.cond = cond;
+            ret.op0 = cpu_tmp4;
+            ret.op1 = t0;
             break;
             
         case JCC_L:
@@ -1069,7 +1041,9 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
                 t0 = cpu_cc_src;
                 break;
             }
-            tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
+            ret.cond = cond;
+            ret.op0 = cpu_tmp4;
+            ret.op1 = t0;
             break;
             
         default:
@@ -1131,12 +1105,28 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
     default:
     slow_jcc:
         gen_setcc_slow_T0(s, jcc_op);
-        tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, 
-                           cpu_T[0], 0, l1);
-        break;
+        ret.cond = inv ? TCG_COND_EQ : TCG_COND_NE;
+        ret.op0 = cpu_T[0];
+        ret.op1_z = 1;
+        ret.slow_T0 = 1;
+       break;
     }
+
+    return ret;
 }
 
+/* Generate a conditional jump to label 'l1' according to jump opcode
+   value 'b'. In the fast case, T0 is guaranted not to be used. */
+static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
+{
+    jcc2_result cmp = gen_jcc2(s, cc_op, b);
+    if (cmp.op1_z)
+        tcg_gen_brcondi_tl(cmp.cond, cmp.op0, 0, l1);
+    else
+        tcg_gen_brcond_tl(cmp.cond, cmp.op0, cmp.op1, l1);
+}
+
+
 /* XXX: does not work with gdbstub "ice" single step - not a
    serious problem */
 static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
@@ -2342,30 +2332,24 @@ static inline void gen_jcc(DisasContext *s, int b,
 
 static void gen_setcc(DisasContext *s, int b)
 {
-    int inv, jcc_op, l1;
-    TCGv t0;
+    jcc2_result cmp = gen_jcc2(s, s->cc_op, b);
 
-    if (is_fast_jcc_case(s, b)) {
-        /* nominal case: we use a jump */
-        /* XXX: make it faster by adding new instructions in TCG */
-        t0 = tcg_temp_local_new();
-        tcg_gen_movi_tl(t0, 0);
-        l1 = gen_new_label();
-        gen_jcc1(s, s->cc_op, b ^ 1, l1);
-        tcg_gen_movi_tl(t0, 1);
-        gen_set_label(l1);
-        tcg_gen_mov_tl(cpu_T[0], t0);
-        tcg_temp_free(t0);
-    } else {
-        /* slow case: it is more efficient not to generate a jump,
-           although it is questionnable whether this optimization is
-           worth to */
-        inv = b & 1;
-        jcc_op = (b >> 1) & 7;
-        gen_setcc_slow_T0(s, jcc_op);
-        if (inv) {
+    if (cmp.slow_T0) {
+        /* Slow case: Note that we've already called gen_setcc_slow_T0
+           inside gen_jcc2, which resulted in a boolean value being placed
+           into cpu_T[0].  Note also that EQ equates to inversion.  */
+        if (cmp.cond == TCG_COND_EQ) {
             tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
         }
+    } else {
+        /* Fast case: We've computed some values that need to be
+           compared directly.  */
+        TCGv op1 = cmp.op1;
+        if (cmp.op1_z)
+            op1 = tcg_const_tl(0);
+        tcg_gen_setcond_tl(cmp.cond, cpu_T[0], cmp.op0, op1);
+        if (cmp.op1_z)
+            tcg_temp_free(op1);
     }
 }
 
@@ -6335,14 +6319,14 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
         {
-            int l1;
-            TCGv t0;
+            TCGv t0, op1;
+            jcc2_result cmp;
 
             ot = dflag + OT_WORD;
             modrm = ldub_code(s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
-            t0 = tcg_temp_local_new();
+            t0 = tcg_temp_new();
             if (mod != 3) {
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
                 gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
@@ -6350,23 +6334,23 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_mov_v_reg(ot, t0, rm);
             }
+
+            cmp = gen_jcc2(s, s->cc_op, b);
+
+            op1 = cmp.op1;
+            if (cmp.op1_z)
+                op1 = tcg_const_tl(0);
+            tcg_gen_movcond_tl(cmp.cond, cpu_regs[reg],
+                               cmp.op0, op1, t0, cpu_regs[reg]);
+            if (cmp.op1_z)
+                tcg_temp_free(op1);
+            tcg_temp_free(t0);
+
 #ifdef TARGET_X86_64
             if (ot == OT_LONG) {
-                /* XXX: specific Intel behaviour ? */
-                l1 = gen_new_label();
-                gen_jcc1(s, s->cc_op, b ^ 1, l1);
-                tcg_gen_mov_tl(cpu_regs[reg], t0);
-                gen_set_label(l1);
                 tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
-            } else
-#endif
-            {
-                l1 = gen_new_label();
-                gen_jcc1(s, s->cc_op, b ^ 1, l1);
-                gen_op_mov_reg_v(ot, reg, t0);
-                gen_set_label(l1);
             }
-            tcg_temp_free(t0);
+#endif
         }
         break;
 
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 4d0fec0..4db44b6 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2154,6 +2154,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_sari_tl tcg_gen_sari_i64
 #define tcg_gen_brcond_tl tcg_gen_brcond_i64
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_movcond_tl tcg_gen_movcond_i64
 #define tcg_gen_mul_tl tcg_gen_mul_i64
 #define tcg_gen_muli_tl tcg_gen_muli_i64
 #define tcg_gen_div_tl tcg_gen_div_i64
@@ -2224,6 +2226,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_sari_tl tcg_gen_sari_i32
 #define tcg_gen_brcond_tl tcg_gen_brcond_i32
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_movcond_tl tcg_gen_movcond_i32
 #define tcg_gen_mul_tl tcg_gen_mul_i32
 #define tcg_gen_muli_tl tcg_gen_muli_i32
 #define tcg_gen_div_tl tcg_gen_div_i32
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 7/7] target-mips: Use setcond and movcond.
  2009-12-17  1:19 [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes Richard Henderson
                   ` (5 preceding siblings ...)
  2009-12-16 23:28 ` [Qemu-devel] [PATCH 6/7] target-i386: Use setcond and movcond Richard Henderson
@ 2009-12-16 23:29 ` Richard Henderson
  2009-12-17 15:32 ` [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes malc
  7 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2009-12-16 23:29 UTC (permalink / raw)
  To: qemu-devel

Uses setcond in the many branch condition generators and movcond
in the conditional move expanders.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-mips/translate.c |  124 +++++++++++++++++++++++++----------------------
 1 files changed, 66 insertions(+), 58 deletions(-)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index dfea6f6..3c1f630 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -695,18 +695,10 @@ FOP_CONDS(abs, ps, 64)
 #undef FOP_CONDS
 
 /* Tests */
-#define OP_COND(name, cond)                                         \
-static inline void glue(gen_op_, name) (TCGv ret, TCGv t0, TCGv t1) \
-{                                                                   \
-    int l1 = gen_new_label();                                       \
-    int l2 = gen_new_label();                                       \
-                                                                    \
-    tcg_gen_brcond_tl(cond, t0, t1, l1);                            \
-    tcg_gen_movi_tl(ret, 0);                                        \
-    tcg_gen_br(l2);                                                 \
-    gen_set_label(l1);                                              \
-    tcg_gen_movi_tl(ret, 1);                                        \
-    gen_set_label(l2);                                              \
+#define OP_COND(name, cond)                                             \
+static inline void glue(gen_op_, name) (TCGv ret, TCGv t0, TCGv t1)     \
+{                                                                       \
+    tcg_gen_setcond_tl(cond, ret, t0, t1);                              \
 }
 OP_COND(eq, TCG_COND_EQ);
 OP_COND(ne, TCG_COND_NE);
@@ -716,35 +708,23 @@ OP_COND(lt, TCG_COND_LT);
 OP_COND(ltu, TCG_COND_LTU);
 #undef OP_COND
 
-#define OP_CONDI(name, cond)                                                 \
+#define OP_CONDI(name, cond)                                            \
 static inline void glue(gen_op_, name) (TCGv ret, TCGv t0, target_ulong val) \
-{                                                                            \
-    int l1 = gen_new_label();                                                \
-    int l2 = gen_new_label();                                                \
-                                                                             \
-    tcg_gen_brcondi_tl(cond, t0, val, l1);                                   \
-    tcg_gen_movi_tl(ret, 0);                                                 \
-    tcg_gen_br(l2);                                                          \
-    gen_set_label(l1);                                                       \
-    tcg_gen_movi_tl(ret, 1);                                                 \
-    gen_set_label(l2);                                                       \
+{                                                                       \
+    TCGv t1 = tcg_const_tl(val);                                        \
+    tcg_gen_setcond_tl(cond, ret, t0, t1);                              \
+    tcg_temp_free(t1);                                                  \
 }
 OP_CONDI(lti, TCG_COND_LT);
 OP_CONDI(ltiu, TCG_COND_LTU);
 #undef OP_CONDI
 
-#define OP_CONDZ(name, cond)                                  \
-static inline void glue(gen_op_, name) (TCGv ret, TCGv t0)    \
-{                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t0, 0, l1);                      \
-    tcg_gen_movi_tl(ret, 0);                                  \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(ret, 1);                                  \
-    gen_set_label(l2);                                        \
+#define OP_CONDZ(name, cond)                                    \
+static inline void glue(gen_op_, name) (TCGv ret, TCGv t0)      \
+{                                                               \
+    TCGv zero = tcg_const_tl(0);                                \
+    tcg_gen_setcond_tl(cond, ret, t0, zero);                    \
+    tcg_temp_free(zero);                                        \
 }
 OP_CONDZ(gez, TCG_COND_GE);
 OP_CONDZ(gtz, TCG_COND_GT);
@@ -1705,36 +1685,45 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
 static void gen_cond_move (CPUState *env, uint32_t opc, int rd, int rs, int rt)
 {
     const char *opn = "cond move";
-    int l1;
+    TCGv zero, vs;
+    TCGCond cond;
 
     if (rd == 0) {
-        /* If no destination, treat it as a NOP.
-           For add & sub, we must generate the overflow exception when needed. */
+        /* If no destination, treat it as a NOP.  For add & sub, we
+           must generate the overflow exception when needed. */
         MIPS_DEBUG("NOP");
         return;
     }
 
-    l1 = gen_new_label();
+    zero = tcg_const_tl(0);
+    if (rs == 0)
+        vs = zero;
+    else
+        vs = cpu_gpr[rs];
+
     switch (opc) {
     case OPC_MOVN:
-        if (likely(rt != 0))
-            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rt], 0, l1);
-        else
-            tcg_gen_br(l1);
         opn = "movn";
+        cond = TCG_COND_EQ;
+        if (unlikely(rt == 0))
+            goto done;
         break;
     case OPC_MOVZ:
-        if (likely(rt != 0))
-            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[rt], 0, l1);
         opn = "movz";
+        cond = TCG_COND_NE;
+        if (unlikely(rt == 0)) {
+            tcg_gen_mov_tl(cpu_gpr[rd], vs);
+            goto done;
+        }
         break;
+    default:
+        abort ();
     }
-    if (rs != 0)
-        tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
-    else
-        tcg_gen_movi_tl(cpu_gpr[rd], 0);
-    gen_set_label(l1);
 
+    tcg_gen_movcond_tl(cond, cpu_gpr[rd], cpu_gpr[rt], zero, vs, cpu_gpr[rd]);
+
+ done:
+    tcg_temp_free(zero);
     MIPS_DEBUG("%s %s, %s, %s", opn, regnames[rd], regnames[rs], regnames[rt]);
 }
 
@@ -5845,7 +5834,6 @@ static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
 
 static void gen_movci (DisasContext *ctx, int rd, int rs, int cc, int tf)
 {
-    int l1;
     TCGCond cond;
     TCGv_i32 t0;
 
@@ -5859,17 +5847,37 @@ static void gen_movci (DisasContext *ctx, int rd, int rs, int cc, int tf)
     else
         cond = TCG_COND_NE;
 
-    l1 = gen_new_label();
     t0 = tcg_temp_new_i32();
     tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
-    tcg_gen_brcondi_i32(cond, t0, 0, l1);
-    tcg_temp_free_i32(t0);
-    if (rs == 0) {
-        tcg_gen_movi_tl(cpu_gpr[rd], 0);
+
+    /* ??? There is no movcond with 32-bit comparison and 64-bit data.  */
+    if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
+        TCGv t1, zero, vs;
+
+#if TARGET_LONG_BITS == 32
+        t1 = t0;
+#else
+        t1 = tcg_temp_new();
+        tcg_gen_ext_i32_i64(t1, t0);
+#endif
+        zero = tcg_const_tl(0);
+        vs = (rs == 0 ? zero : cpu_gpr[rs]);
+        tcg_gen_movcond_tl(cond, cpu_gpr[rd], t1, zero, cpu_gpr[rd], vs);
+        tcg_temp_free(zero);
+#if TARGET_LONG_BITS == 64
+        tcg_temp_free(t1);
+#endif
+        tcg_temp_free_i32(t0);
     } else {
-        tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        int l1 = gen_new_label();
+        tcg_gen_brcondi_i32(cond, t0, 0, l1);
+        if (rs == 0) {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        } else {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        }
+        gen_set_label(l1);
     }
-    gen_set_label(l1);
 }
 
 static inline void gen_movcf_s (int fs, int fd, int cc, int tf)
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes
@ 2009-12-17  1:19 Richard Henderson
  2009-12-16  0:34 ` [Qemu-devel] [PATCH 1/7] tcg: Generic support for conditional set and conditional move Richard Henderson
                   ` (7 more replies)
  0 siblings, 8 replies; 21+ messages in thread
From: Richard Henderson @ 2009-12-17  1:19 UTC (permalink / raw)
  To: qemu-devel

This patch series adds support for setcond (aka setcc) and
movcond (aka cmov) opcodes to TCG.

These new opcodes are considered "required" by the backend,
because expanding them at the tcg level breaks the basic block.
There might be some way to emulate within tcg internals, but
that doesn't seem worthwhile, as essentially all hosts have
some form of support for these.

I've implemented support for the new opcodes within the x86_64,
i386, and sparc tcg backends.  The later is untested due to 
lack of hardware and failure of the build system to cross-compile.
However, it should be a decent starting point for whoever can.

I've implementing support for the new opcodes within the alpha,
i386 and mips translators.  The translations work, as far as I
can tell from linux-user-test.

Comments?


r~


Richard Henderson (7):
  tcg: Generic support for conditional set and conditional move.
  tcg-amd64: Implement setcond and movcond.
  target-alpha: Use setcond/movcond in integer compares and cmoves.
  tcg-i386: Implement setcond, movcond, setcond2.
  tcg-sparc: Implement setcond, movcond, setcond2, brcond2.
  target-i386: Use setcond and movcond.
  target-mips: Use setcond and movcond.

 elf.h                    |    2 +
 target-alpha/translate.c |   66 ++++----
 target-i386/translate.c  |  192 ++++++++++------------
 target-mips/translate.c  |  124 ++++++++-------
 tcg/README               |   16 ++-
 tcg/i386/tcg-target.c    |  280 ++++++++++++++++++++++++++------
 tcg/sparc/tcg-target.c   |  415 ++++++++++++++++++++++++++++++++++++++++------
 tcg/tcg-op.h             |   91 ++++++++++
 tcg/tcg-opc.h            |    5 +
 tcg/tcg.c                |   23 ++-
 tcg/tcg.h                |    5 +
 tcg/x86_64/tcg-target.c  |   65 +++++++-
 12 files changed, 973 insertions(+), 311 deletions(-)

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes
  2009-12-17  1:19 [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes Richard Henderson
                   ` (6 preceding siblings ...)
  2009-12-16 23:29 ` [Qemu-devel] [PATCH 7/7] target-mips: " Richard Henderson
@ 2009-12-17 15:32 ` malc
  2009-12-17 15:37   ` Laurent Desnogues
  2009-12-17 17:07   ` Richard Henderson
  7 siblings, 2 replies; 21+ messages in thread
From: malc @ 2009-12-17 15:32 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Wed, 16 Dec 2009, Richard Henderson wrote:

> This patch series adds support for setcond (aka setcc) and
> movcond (aka cmov) opcodes to TCG.
> 
> These new opcodes are considered "required" by the backend,
> because expanding them at the tcg level breaks the basic block.
> There might be some way to emulate within tcg internals, but
> that doesn't seem worthwhile, as essentially all hosts have
> some form of support for these.
> 
> I've implemented support for the new opcodes within the x86_64,
> i386, and sparc tcg backends.  The later is untested due to 
> lack of hardware and failure of the build system to cross-compile.
> However, it should be a decent starting point for whoever can.
> 
> I've implementing support for the new opcodes within the alpha,
> i386 and mips translators.  The translations work, as far as I
> can tell from linux-user-test.
> 
> Comments?

Some:
 a. It breaks tcg on PPC[1]:

    ...qemu/tcg/tcg.c:1378: tcg fatal error

 b. Documentation for movcond has a typo, t0 is assigned not t1
 
 c. Historically things like that were made conditional with
    a generic fallback (bswap, neg, not, rot, etc)

 d. Documentation for setcond2 is missing

 e. There's some indentation weirdness here and there and `git am'
    complains about added trailing whitespace

It would also be interesting to learn what impact adding those two
has on performance, any results?

[..snip..]

[1] With following i can run some i386 user tests on PPC32 (ls,
    openssl)

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 07e6941..195af13 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -316,6 +316,7 @@ static int tcg_target_const_match(tcg_target_long val,
 #define STH    OPCD(44)
 #define STW    OPCD(36)
 
+#define ADDIC  OPCD(12)
 #define ADDI   OPCD(14)
 #define ADDIS  OPCD(15)
 #define ORI    OPCD(24)
@@ -339,6 +340,7 @@ static int tcg_target_const_match(tcg_target_long val,
 #define CRANDC XO19(129)
 #define CRNAND XO19(225)
 #define CROR   XO19(449)
+#define CRNOR  XO19( 33)
 
 #define EXTSB  XO31(954)
 #define EXTSH  XO31(922)
@@ -365,6 +367,8 @@ static int tcg_target_const_match(tcg_target_long val,
 #define MTSPR  XO31(467)
 #define SRAWI  XO31(824)
 #define NEG    XO31(104)
+#define MFCR   XO31( 19)
+#define CNTLZW XO31( 26)
 
 #define LBZX   XO31( 87)
 #define LHZX   XO31(279)
@@ -1073,6 +1077,95 @@ static void tcg_out_brcond (TCGContext *s, int cond,
     tcg_out_bc (s, tcg_to_bc[cond], label_index);
 }
 
+static void tcg_out_setcond (TCGContext *s, int cond, TCGArg arg0,
+                             TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+    int crop, sh;
+
+    switch (cond) {
+    case TCG_COND_EQ:
+        if (const_arg2) {
+            if ((uint16_t) arg2 == arg2) {
+                tcg_out32 (s, XORI | RS (arg1) | RA (0) | arg2);
+            }
+            else {
+                tcg_out_movi (s, TCG_TYPE_I32, 0, arg2);
+                tcg_out32 (s, XOR | SAB (arg1, 0, 0));
+            }
+        }
+        else {
+            tcg_out32 (s, XOR | SAB (arg1, 0, arg2));
+        }
+        tcg_out32 (s, CNTLZW | RS (0) | RA (0));
+        tcg_out32 (s, (RLWINM
+                       | RA (arg0)
+                       | RS (0)
+                       | SH (27)
+                       | MB (5)
+                       | ME (31)
+                       )
+            );
+        return;
+
+    case TCG_COND_NE:
+        if (const_arg2) {
+            if ((uint16_t) arg2 == arg2) {
+                tcg_out32 (s, XORI | RS (arg1) | RA (0) | arg2);
+            }
+            else {
+                tcg_out_movi (s, TCG_TYPE_I32, 0, arg2);
+                tcg_out32 (s, XOR | SAB (arg1, 0, 0));
+            }
+        }
+        else {
+            tcg_out32 (s, XOR | SAB (arg1, 0, arg2));
+        }
+
+        tcg_out32 (s, ADDIC | RT (arg0) | RA (0) | 0xffff);
+        tcg_out32 (s, SUBFE | TAB (arg0, arg0, 0));
+        return;
+
+    case TCG_COND_LTU:
+    case TCG_COND_LT:
+        sh = 29;
+        crop = 0;
+        break;
+
+    case TCG_COND_GEU:
+    case TCG_COND_GE:
+        sh = 31;
+        crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_LT) | BB (7, CR_LT);
+        break;
+
+    case TCG_COND_LEU:
+    case TCG_COND_LE:
+        sh = 31;
+        crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_GT) | BB (7, CR_GT);
+        break;
+
+    case TCG_COND_GTU:
+    case TCG_COND_GT:
+        sh = 30;
+        crop = 0;
+        break;
+
+    default:
+        tcg_abort ();
+    }
+
+    tcg_out_cmp (s, cond, arg1, arg2, const_arg2, 7);
+    tcg_out32 (s, MFCR | RT (0));
+    if (crop) tcg_out32 (s, crop);
+    tcg_out32 (s, (RLWINM
+                   | RA (arg0)
+                   | RS (0)
+                   | SH (sh)
+                   | MB (31)
+                   | ME (31)
+                   )
+            );
+}
+
 /* XXX: we implement it at the target level to avoid having to
    handle cross basic blocks temporaries */
 static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
@@ -1496,6 +1589,10 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
         tcg_out32 (s, EXTSH | RS (args[1]) | RA (args[0]));
         break;
 
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
+        break;
+
     default:
         tcg_dump_ops (s, stderr);
         tcg_abort ();
@@ -1544,6 +1641,8 @@ static const TCGTargetOpDef ppc_op_defs[] = {
 
     { INDEX_op_neg_i32, { "r", "r" } },
 
+    { INDEX_op_setcond_i32, { "r", "r", "ri" } },
+
 #if TARGET_LONG_BITS == 32
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },

-- 
mailto:av1474@comtv.ru

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes
  2009-12-17 15:32 ` [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes malc
@ 2009-12-17 15:37   ` Laurent Desnogues
  2009-12-17 17:07   ` Richard Henderson
  1 sibling, 0 replies; 21+ messages in thread
From: Laurent Desnogues @ 2009-12-17 15:37 UTC (permalink / raw)
  To: malc; +Cc: qemu-devel, Richard Henderson

On Thu, Dec 17, 2009 at 4:32 PM, malc <av1474@comtv.ru> wrote:
[...]
>
> Some:
>  a. It breaks tcg on PPC[1]:
>
>    ...qemu/tcg/tcg.c:1378: tcg fatal error

What a surprise :-)

I can provide a similar patch for ARM (I already have one
for my own implementation of setcond), but I'll wait for this
patch series to stabilize first.


Laurent

>  b. Documentation for movcond has a typo, t0 is assigned not t1
>
>  c. Historically things like that were made conditional with
>    a generic fallback (bswap, neg, not, rot, etc)
>
>  d. Documentation for setcond2 is missing
>
>  e. There's some indentation weirdness here and there and `git am'
>    complains about added trailing whitespace
>
> It would also be interesting to learn what impact adding those two
> has on performance, any results?
>
> [..snip..]
>
> [1] With following i can run some i386 user tests on PPC32 (ls,
>    openssl)
>
> diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
> index 07e6941..195af13 100644
> --- a/tcg/ppc/tcg-target.c
> +++ b/tcg/ppc/tcg-target.c
> @@ -316,6 +316,7 @@ static int tcg_target_const_match(tcg_target_long val,
>  #define STH    OPCD(44)
>  #define STW    OPCD(36)
>
> +#define ADDIC  OPCD(12)
>  #define ADDI   OPCD(14)
>  #define ADDIS  OPCD(15)
>  #define ORI    OPCD(24)
> @@ -339,6 +340,7 @@ static int tcg_target_const_match(tcg_target_long val,
>  #define CRANDC XO19(129)
>  #define CRNAND XO19(225)
>  #define CROR   XO19(449)
> +#define CRNOR  XO19( 33)
>
>  #define EXTSB  XO31(954)
>  #define EXTSH  XO31(922)
> @@ -365,6 +367,8 @@ static int tcg_target_const_match(tcg_target_long val,
>  #define MTSPR  XO31(467)
>  #define SRAWI  XO31(824)
>  #define NEG    XO31(104)
> +#define MFCR   XO31( 19)
> +#define CNTLZW XO31( 26)
>
>  #define LBZX   XO31( 87)
>  #define LHZX   XO31(279)
> @@ -1073,6 +1077,95 @@ static void tcg_out_brcond (TCGContext *s, int cond,
>     tcg_out_bc (s, tcg_to_bc[cond], label_index);
>  }
>
> +static void tcg_out_setcond (TCGContext *s, int cond, TCGArg arg0,
> +                             TCGArg arg1, TCGArg arg2, int const_arg2)
> +{
> +    int crop, sh;
> +
> +    switch (cond) {
> +    case TCG_COND_EQ:
> +        if (const_arg2) {
> +            if ((uint16_t) arg2 == arg2) {
> +                tcg_out32 (s, XORI | RS (arg1) | RA (0) | arg2);
> +            }
> +            else {
> +                tcg_out_movi (s, TCG_TYPE_I32, 0, arg2);
> +                tcg_out32 (s, XOR | SAB (arg1, 0, 0));
> +            }
> +        }
> +        else {
> +            tcg_out32 (s, XOR | SAB (arg1, 0, arg2));
> +        }
> +        tcg_out32 (s, CNTLZW | RS (0) | RA (0));
> +        tcg_out32 (s, (RLWINM
> +                       | RA (arg0)
> +                       | RS (0)
> +                       | SH (27)
> +                       | MB (5)
> +                       | ME (31)
> +                       )
> +            );
> +        return;
> +
> +    case TCG_COND_NE:
> +        if (const_arg2) {
> +            if ((uint16_t) arg2 == arg2) {
> +                tcg_out32 (s, XORI | RS (arg1) | RA (0) | arg2);
> +            }
> +            else {
> +                tcg_out_movi (s, TCG_TYPE_I32, 0, arg2);
> +                tcg_out32 (s, XOR | SAB (arg1, 0, 0));
> +            }
> +        }
> +        else {
> +            tcg_out32 (s, XOR | SAB (arg1, 0, arg2));
> +        }
> +
> +        tcg_out32 (s, ADDIC | RT (arg0) | RA (0) | 0xffff);
> +        tcg_out32 (s, SUBFE | TAB (arg0, arg0, 0));
> +        return;
> +
> +    case TCG_COND_LTU:
> +    case TCG_COND_LT:
> +        sh = 29;
> +        crop = 0;
> +        break;
> +
> +    case TCG_COND_GEU:
> +    case TCG_COND_GE:
> +        sh = 31;
> +        crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_LT) | BB (7, CR_LT);
> +        break;
> +
> +    case TCG_COND_LEU:
> +    case TCG_COND_LE:
> +        sh = 31;
> +        crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_GT) | BB (7, CR_GT);
> +        break;
> +
> +    case TCG_COND_GTU:
> +    case TCG_COND_GT:
> +        sh = 30;
> +        crop = 0;
> +        break;
> +
> +    default:
> +        tcg_abort ();
> +    }
> +
> +    tcg_out_cmp (s, cond, arg1, arg2, const_arg2, 7);
> +    tcg_out32 (s, MFCR | RT (0));
> +    if (crop) tcg_out32 (s, crop);
> +    tcg_out32 (s, (RLWINM
> +                   | RA (arg0)
> +                   | RS (0)
> +                   | SH (sh)
> +                   | MB (31)
> +                   | ME (31)
> +                   )
> +            );
> +}
> +
>  /* XXX: we implement it at the target level to avoid having to
>    handle cross basic blocks temporaries */
>  static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
> @@ -1496,6 +1589,10 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
>         tcg_out32 (s, EXTSH | RS (args[1]) | RA (args[0]));
>         break;
>
> +    case INDEX_op_setcond_i32:
> +        tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
> +        break;
> +
>     default:
>         tcg_dump_ops (s, stderr);
>         tcg_abort ();
> @@ -1544,6 +1641,8 @@ static const TCGTargetOpDef ppc_op_defs[] = {
>
>     { INDEX_op_neg_i32, { "r", "r" } },
>
> +    { INDEX_op_setcond_i32, { "r", "r", "ri" } },
> +
>  #if TARGET_LONG_BITS == 32
>     { INDEX_op_qemu_ld8u, { "r", "L" } },
>     { INDEX_op_qemu_ld8s, { "r", "L" } },
>
> --
> mailto:av1474@comtv.ru
>
>
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes
  2009-12-17 15:32 ` [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes malc
  2009-12-17 15:37   ` Laurent Desnogues
@ 2009-12-17 17:07   ` Richard Henderson
  2009-12-17 17:47     ` malc
                       ` (2 more replies)
  1 sibling, 3 replies; 21+ messages in thread
From: Richard Henderson @ 2009-12-17 17:07 UTC (permalink / raw)
  To: malc; +Cc: qemu-devel

On 12/17/2009 07:32 AM, malc wrote:
>> These new opcodes are considered "required" by the backend,
>> because expanding them at the tcg level breaks the basic block.
>> There might be some way to emulate within tcg internals, but
>> that doesn't seem worthwhile, as essentially all hosts have
>> some form of support for these.
...
 >   c. Historically things like that were made conditional with
 >      a generic fallback (bswap, neg, not, rot, etc)

I answered this one above.  A generic fallback would break the
basic block, which would break TCGs simple register allocation.

>   b. Documentation for movcond has a typo, t0 is assigned not t1

Oops.  Will fix.

>   d. Documentation for setcond2 is missing

Ah, I see that brcond2 is missing as well; I'll fix that too.

> It would also be interesting to learn what impact adding those two
> has on performance, any results?

Hmph, not as much as I would have liked.  I suppose Intel is getting 
pretty darned good with its branch prediction.  It shaved about 3 
minutes off 183.equake from what I posted earlier this week; that's 
something around a 7% improvement, assuming it's not just all noise (I 
havn't run that test enough times to see what the variation is).

> +    case TCG_COND_NE:
> +        if (const_arg2) {
> +            if ((uint16_t) arg2 == arg2) {
> +                tcg_out32 (s, XORI | RS (arg1) | RA (0) | arg2);
> +            }
> +            else {
> +                tcg_out_movi (s, TCG_TYPE_I32, 0, arg2);
> +                tcg_out32 (s, XOR | SAB (arg1, 0, 0));
> +            }
> +        }
> +        else {
> +            tcg_out32 (s, XOR | SAB (arg1, 0, arg2));
> +        }
> +
> +        tcg_out32 (s, ADDIC | RT (arg0) | RA (0) | 0xffff);
> +        tcg_out32 (s, SUBFE | TAB (arg0, arg0, 0));
> +        return;

Heh, you know a trick that gcc doesn't for powerpc.  It just adds an xor 
at the end of the EQ sequence.


r~

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes
  2009-12-17 17:07   ` Richard Henderson
@ 2009-12-17 17:47     ` malc
  2009-12-17 18:09       ` Richard Henderson
  2009-12-17 17:48     ` Richard Henderson
  2009-12-18 15:40     ` malc
  2 siblings, 1 reply; 21+ messages in thread
From: malc @ 2009-12-17 17:47 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Thu, 17 Dec 2009, Richard Henderson wrote:

> On 12/17/2009 07:32 AM, malc wrote:
> > > These new opcodes are considered "required" by the backend,
> > > because expanding them at the tcg level breaks the basic block.
> > > There might be some way to emulate within tcg internals, but
> > > that doesn't seem worthwhile, as essentially all hosts have
> > > some form of support for these.
> ..
> >   c. Historically things like that were made conditional with
> >      a generic fallback (bswap, neg, not, rot, etc)
> 
> I answered this one above.  A generic fallback would break the
> basic block, which would break TCGs simple register allocation.

Urgh.. I really hate implementing those xxxx2 ops.

See for example this lovely thread:
http://www.archivum.info/qemu-devel@nongnu.org/2008-06/00306/%5BQemu-devel%5D_%5B4705%5D_Fix_div%5Bu%5D2.

> 
> >   b. Documentation for movcond has a typo, t0 is assigned not t1
> 
> Oops.  Will fix.
> 
> >   d. Documentation for setcond2 is missing
> 
> Ah, I see that brcond2 is missing as well; I'll fix that too.
> 
> > It would also be interesting to learn what impact adding those two
> > has on performance, any results?
> 
> Hmph, not as much as I would have liked.  I suppose Intel is getting pretty
> darned good with its branch prediction.  It shaved about 3 minutes off
> 183.equake from what I posted earlier this week; that's something around a 7%
> improvement, assuming it's not just all noise (I havn't run that test enough
> times to see what the variation is).

If 3 minutes(!!) is only 7% then this test is a monster.

> 
> > +    case TCG_COND_NE:
> > +        if (const_arg2) {
> > +            if ((uint16_t) arg2 == arg2) {
> > +                tcg_out32 (s, XORI | RS (arg1) | RA (0) | arg2);
> > +            }
> > +            else {
> > +                tcg_out_movi (s, TCG_TYPE_I32, 0, arg2);
> > +                tcg_out32 (s, XOR | SAB (arg1, 0, 0));
> > +            }
> > +        }
> > +        else {
> > +            tcg_out32 (s, XOR | SAB (arg1, 0, arg2));
> > +        }
> > +
> > +        tcg_out32 (s, ADDIC | RT (arg0) | RA (0) | 0xffff);
> > +        tcg_out32 (s, SUBFE | TAB (arg0, arg0, 0));
> > +        return;
> 
> Heh, you know a trick that gcc doesn't for powerpc.  It just adds an xor at
> the end of the EQ sequence.

Well, truth be told, i just looked at what gcc 4.4.1 produces for:

    return op1 != op2 ? 1 : 0;
               ==

And did the same. FWIW gcc's handling of LT,LE,GT,GE is not as naive as
this implementation (it avoid CR ops/moves when op2 is immediate), but
i'm not sure if the gain is worth the pain though, so left it is for
simplicity's sake.

P.S. BTW PPC has the same dilema w.r.t. conditional moves, on x86 it's
     cmov that is not universally available for PPC it's isel, given
     that there's a also some fruit in going with LDBRX were available
     i guess it's worthwile investigation the proper interface for
     enquiring the host CPU features...

-- 
mailto:av1474@comtv.ru

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes
  2009-12-17 17:07   ` Richard Henderson
  2009-12-17 17:47     ` malc
@ 2009-12-17 17:48     ` Richard Henderson
  2009-12-18 15:40     ` malc
  2 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2009-12-17 17:48 UTC (permalink / raw)
  To: malc; +Cc: qemu-devel

On 12/17/2009 09:07 AM, Richard Henderson wrote:
>> + case TCG_COND_NE:
>> + if (const_arg2) {
>> + if ((uint16_t) arg2 == arg2) {
>> + tcg_out32 (s, XORI | RS (arg1) | RA (0) | arg2);
>> + }
>> + else {
>> + tcg_out_movi (s, TCG_TYPE_I32, 0, arg2);
>> + tcg_out32 (s, XOR | SAB (arg1, 0, 0));
>> + }
>> + }
>> + else {
>> + tcg_out32 (s, XOR | SAB (arg1, 0, arg2));
>> + }
>> +
>> + tcg_out32 (s, ADDIC | RT (arg0) | RA (0) | 0xffff);
>> + tcg_out32 (s, SUBFE | TAB (arg0, arg0, 0));
>> + return;
>
> Heh, you know a trick that gcc doesn't for powerpc. It just adds an xor
> at the end of the EQ sequence.

Ah, gcc does know the trick, it just thinks it's only a win on old Power 
systems.


r~

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes
  2009-12-17 17:47     ` malc
@ 2009-12-17 18:09       ` Richard Henderson
  0 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2009-12-17 18:09 UTC (permalink / raw)
  To: malc; +Cc: qemu-devel

On 12/17/2009 09:47 AM, malc wrote:
> Urgh.. I really hate implementing those xxxx2 ops.
>
> See for example this lovely thread:
> http://www.archivum.info/qemu-devel@nongnu.org/2008-06/00306/%5BQemu-devel%5D_%5B4705%5D_Fix_div%5Bu%5D2.

Heh, that one's pretty nasty.

But fwiw, you can just call into your brcond2 implementation for 
setcond2.  There's a bit more work to do on sparc and mips because 
they'd prefer we filled the delay slots.


r~

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes
  2009-12-17 17:07   ` Richard Henderson
  2009-12-17 17:47     ` malc
  2009-12-17 17:48     ` Richard Henderson
@ 2009-12-18 15:40     ` malc
  2009-12-18 16:05       ` Richard Henderson
  2 siblings, 1 reply; 21+ messages in thread
From: malc @ 2009-12-18 15:40 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Thu, 17 Dec 2009, Richard Henderson wrote:

> On 12/17/2009 07:32 AM, malc wrote:
> > > These new opcodes are considered "required" by the backend,
> > > because expanding them at the tcg level breaks the basic block.
> > > There might be some way to emulate within tcg internals, but
> > > that doesn't seem worthwhile, as essentially all hosts have
> > > some form of support for these.
> ..
> >   c. Historically things like that were made conditional with
> >      a generic fallback (bswap, neg, not, rot, etc)
> 
> I answered this one above.  A generic fallback would break the
> basic block, which would break TCGs simple register allocation.
> 
> >   b. Documentation for movcond has a typo, t0 is assigned not t1
> 
> Oops.  Will fix.
> 
> >   d. Documentation for setcond2 is missing
> 
> Ah, I see that brcond2 is missing as well; I'll fix that too.
> 
> > It would also be interesting to learn what impact adding those two
> > has on performance, any results?
> 
> Hmph, not as much as I would have liked.  I suppose Intel is getting pretty
> darned good with its branch prediction.  It shaved about 3 minutes off
> 183.equake from what I posted earlier this week; that's something around a 7%
> improvement, assuming it's not just all noise (I havn't run that test enough
> times to see what the variation is).
> 

After fixing a bug (crop was done after reading the cr) i run some
openssl speed benchmarks, and, at least here on an MPC7447A, got a
speed degradation, tiny but consistent. Took a very quick glance at
the generated code and the first thing i saw was this:

----------------
IN: 
0x40082295:  movzbl (%eax),%eax
0x40082298:  cmp    $0x3d,%al
0x4008229a:  setne  %dl
0x4008229d:  test   %al,%al
0x4008229f:  je     0x400822d2

OP after liveness analysis:
 mov_i32 tmp2,eax
 qemu_ld8u tmp0,tmp2,$0xffffffff
 mov_i32 eax,tmp0
 movi_i32 tmp1,$0x3d
 mov_i32 tmp0,eax
 nopn $0x2,$0x2
 sub_i32 cc_dst,tmp0,tmp1
 movi_i32 tmp13,$0xff
 and_i32 tmp4,cc_dst,tmp13
 movi_i32 tmp13,$0x0
 setcond_i32 tmp0,tmp4,tmp13,ne
 movi_i32 tmp14,$0xff
 and_i32 tmp13,tmp0,tmp14

....

OUT: [size=204]
0x601051b0:  lwz     r14,0(r27)
0x601051b4:  lbzx    r14,0,r14
0x601051b8:  mr      r15,r14
0x601051bc:  addi    r15,r15,-61
0x601051c0:  andi.   r15,r15,255
0x601051c4:  cmpwi   cr6,r15,0
0x601051c8:  crnot   4*cr7+eq,4*cr6+eq
0x601051cc:  mfcr    r0
0x601051d0:  rlwinm  r15,r0,31,31,31
0x601051d4:  andi.   r15,r15,255

...

So the fact that setcond produces 0/1 was never communicated to the
tcg, not that i would claim that it's possible at all...

[..snip..]

-- 
mailto:av1474@comtv.ru

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes
  2009-12-18 15:40     ` malc
@ 2009-12-18 16:05       ` Richard Henderson
  0 siblings, 0 replies; 21+ messages in thread
From: Richard Henderson @ 2009-12-18 16:05 UTC (permalink / raw)
  To: malc; +Cc: qemu-devel

On 12/18/2009 07:40 AM, malc wrote:
> After fixing a bug (crop was done after reading the cr) i run some
> openssl speed benchmarks, and, at least here on an MPC7447A, got a
> speed degradation, tiny but consistent.

Well, you could try rendering the setcond with branches instead of 
logical operations.  You'll still gain the benefit of not having ended 
the TCG basic block, and forced the stores of globals to their slots etc 
etc.

> IN:
> 0x40082295:  movzbl (%eax),%eax
> 0x40082298:  cmp    $0x3d,%al
> 0x4008229a:  setne  %dl
> 0x4008229d:  test   %al,%al
> 0x4008229f:  je     0x400822d2
>
> OP after liveness analysis:
>   mov_i32 tmp2,eax
>   qemu_ld8u tmp0,tmp2,$0xffffffff
>   mov_i32 eax,tmp0
>   movi_i32 tmp1,$0x3d
>   mov_i32 tmp0,eax
>   nopn $0x2,$0x2
>   sub_i32 cc_dst,tmp0,tmp1
>   movi_i32 tmp13,$0xff
>   and_i32 tmp4,cc_dst,tmp13
>   movi_i32 tmp13,$0x0
>   setcond_i32 tmp0,tmp4,tmp13,ne
>   movi_i32 tmp14,$0xff
>   and_i32 tmp13,tmp0,tmp14
>
> ....
>
> OUT: [size=204]
> 0x601051b0:  lwz     r14,0(r27)
> 0x601051b4:  lbzx    r14,0,r14
> 0x601051b8:  mr      r15,r14
> 0x601051bc:  addi    r15,r15,-61
> 0x601051c0:  andi.   r15,r15,255
> 0x601051c4:  cmpwi   cr6,r15,0
> 0x601051c8:  crnot   4*cr7+eq,4*cr6+eq
> 0x601051cc:  mfcr    r0
> 0x601051d0:  rlwinm  r15,r0,31,31,31
> 0x601051d4:  andi.   r15,r15,255
>
> ...
>
> So the fact that setcond produces 0/1 was never communicated to the
> tcg, not that i would claim that it's possible at all...

It isn't.

And anyway, if you look at the opcodes generated without the setcond 
patch you'll see that and 255 in there as well.  Some more surgery on 
the i386 translator could probably get rid of that.  All I replaced were 
sequences of

   brcond c1,c2,$lab_true
   movi dest,0
   br $lab_over
   movi dest,1



r~

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 5/7] tcg-sparc: Implement setcond, movcond, setcond2, brcond2.
  2009-12-16 23:26 ` [Qemu-devel] [PATCH 5/7] tcg-sparc: Implement setcond, movcond, setcond2, brcond2 Richard Henderson
@ 2009-12-19 10:31   ` Blue Swirl
  2009-12-19 17:47     ` Richard Henderson
  0 siblings, 1 reply; 21+ messages in thread
From: Blue Swirl @ 2009-12-19 10:31 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Wed, Dec 16, 2009 at 11:26 PM, Richard Henderson <rth@twiddle.net> wrote:
> An initial cut at conditional moves for the sparc backend.
>
> Untested, as I don't have sparc hardware and the build system
> resists attempts at cross-compilation.

I can try if you have a test case.

> Note fixes to tcg_out_movi_imm32 (wrong check_fit_tl width),
> use of TCG_TARGET_REG_BITS == 64 tests instead of explicitly
> checking for __sparc_v9__ everywhere.

Good fixes. I think these should be in a different patch which could be applied.

> -    tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
> +    if (ret != arg)
> +        tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
>  }

This optimization is already handled at tcg-op.h:tcg_gen_mov_i32().

>  static inline void tcg_out_movi_imm32(TCGContext *s, int ret, uint32_t arg)
>  {
> -    if (check_fit_tl(arg, 12))
> +    if (check_fit_tl(arg, 13))
>         tcg_out_movi_imm13(s, ret, arg);

IIRC sign extension prevents this.

>  static inline void tcg_out_movi(TCGContext *s, TCGType type,
>                                 int ret, tcg_target_long arg)
>  {
> -#if defined(__sparc_v9__) && !defined(__sparc_v8plus__)
> -    if (!check_fit_tl(arg, 32) && (arg & ~0xffffffffULL) != 0) {
> -        tcg_out_movi_imm32(s, TCG_REG_I4, arg >> 32);
> -        tcg_out_arithi(s, TCG_REG_I4, TCG_REG_I4, 32, SHIFT_SLLX);
> +    if (type == TCG_TYPE_I32 || (arg & ~(tcg_target_long)0xffffffff))
>         tcg_out_movi_imm32(s, ret, arg);
> -        tcg_out_arith(s, ret, ret, TCG_REG_I4, ARITH_OR);
> -    } else if (check_fit_tl(arg, 12))
> -        tcg_out_movi_imm13(s, ret, arg);
> -    else {
> -        tcg_out_sethi(s, ret, arg);
> -        if (arg & 0x3ff)
> -            tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
> +    else if (TCG_TARGET_REG_BITS == 64) {
> +        if (check_fit_tl(arg, 32)) {
> +            /* Sign extended 32-bit constants are formed with SETHI+XOR.  */
> +            tcg_out_sethi(s, ret, ~arg);
> +            tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
> +        } else {
> +            tcg_out_movi_imm32(s, TCG_REG_I4, arg >> 32);
> +            tcg_out_arithi(s, TCG_REG_I4, TCG_REG_I4, 32, SHIFT_SLLX);
> +            tcg_out_movi_imm32(s, ret, arg);
> +            tcg_out_arith(s, ret, ret, TCG_REG_I4, ARITH_OR);
> +        }
>     }
> -#else
> -    tcg_out_movi_imm32(s, ret, arg);
> -#endif
>  }

Please split this also to another patch, it looks good.


> +        int32_t val = l->u.value - (tcg_target_long)s->code_ptr;
> +        tcg_out32(s, (INSN_OP(0) | opc | INSN_OP2(0x2)
>                       | INSN_OFF22(l->u.value - (unsigned long)s->code_ptr)));
>     } else {
>         tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP22, label_index, 0);
> -        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | 0));
> +        tcg_out32(s, (INSN_OP(0) | opc | INSN_OP2(0x2) | 0));

What instruction is this? A define would be in order.

> -        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
> -                      (0x5 << 19) |
> +        tcg_out32(s, (INSN_OP(0) | opc | INSN_OP2(0x1) | (0x5 << 19) |
>                       INSN_OFF19(l->u.value - (unsigned long)s->code_ptr)));
>     } else {
>         tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label_index, 0);
> -        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
> -                      (0x5 << 19) | 0));
> +        tcg_out32(s, (INSN_OP(0) | opc | INSN_OP2(0x1) | (0x5 << 19) | 0));

Same here.

>  static void tcg_out_brcond_i32(TCGContext *s, int cond,
>                                TCGArg arg1, TCGArg arg2, int const_arg2,
>                                int label_index)
>  {
> -    if (const_arg2 && arg2 == 0)
> -        /* orcc %g0, r, %g0 */
> -        tcg_out_arith(s, TCG_REG_G0, TCG_REG_G0, arg1, ARITH_ORCC);
> -    else
> -        /* subcc r1, r2, %g0 */
> -        tcg_out_arith(s, TCG_REG_G0, arg1, arg2, ARITH_SUBCC);
> -    tcg_out_branch_i32(s, tcg_cond_to_bcond[cond], label_index);
> +    tcg_out_cmp(s, arg1, arg2, const_arg2);

What's wrong with 'orcc' (produces the synthetic instruction 'tst')?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 5/7] tcg-sparc: Implement setcond, movcond, setcond2, brcond2.
  2009-12-19 10:31   ` Blue Swirl
@ 2009-12-19 17:47     ` Richard Henderson
  2009-12-19 21:25       ` Blue Swirl
  0 siblings, 1 reply; 21+ messages in thread
From: Richard Henderson @ 2009-12-19 17:47 UTC (permalink / raw)
  To: Blue Swirl; +Cc: qemu-devel

On 12/19/2009 02:31 AM, Blue Swirl wrote:
>>   static inline void tcg_out_movi_imm32(TCGContext *s, int ret, uint32_t arg)
>>   {
>> -    if (check_fit_tl(arg, 12))
>> +    if (check_fit_tl(arg, 13))
>>          tcg_out_movi_imm13(s, ret, arg);
>
> IIRC sign extension prevents this.

Pardon?  check_fit_tl checks a signed value, the OR opcode provides one. 
  Where's the conflict?

>> -    if (const_arg2&&  arg2 == 0)
>> -        /* orcc %g0, r, %g0 */
>> -        tcg_out_arith(s, TCG_REG_G0, TCG_REG_G0, arg1, ARITH_ORCC);
>> -    else
>> -        /* subcc r1, r2, %g0 */
>> -        tcg_out_arith(s, TCG_REG_G0, arg1, arg2, ARITH_SUBCC);
>> -    tcg_out_branch_i32(s, tcg_cond_to_bcond[cond], label_index);
>> +    tcg_out_cmp(s, arg1, arg2, const_arg2);
>
> What's wrong with 'orcc' (produces the synthetic instruction 'tst')?

What result does "orcc" give that isn't produced by "subcc"?  Unlike 
i386 where "test x,x" is one byte smaller than "cmp $0,x", it seems to 
me there's no reason to distingish the arg2 == constant zero case on sparc.


r~

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 5/7] tcg-sparc: Implement setcond, movcond, setcond2, brcond2.
  2009-12-19 17:47     ` Richard Henderson
@ 2009-12-19 21:25       ` Blue Swirl
  2009-12-19 22:52         ` Richard Henderson
  0 siblings, 1 reply; 21+ messages in thread
From: Blue Swirl @ 2009-12-19 21:25 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Sat, Dec 19, 2009 at 5:47 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 12/19/2009 02:31 AM, Blue Swirl wrote:
>>>
>>>  static inline void tcg_out_movi_imm32(TCGContext *s, int ret, uint32_t
>>> arg)
>>>  {
>>> -    if (check_fit_tl(arg, 12))
>>> +    if (check_fit_tl(arg, 13))
>>>         tcg_out_movi_imm13(s, ret, arg);
>>
>> IIRC sign extension prevents this.
>
> Pardon?  check_fit_tl checks a signed value, the OR opcode provides one.
>  Where's the conflict?

Long time ago I tried the same change, but the generated code was not
correct. But now it seems to work.

>>> -    if (const_arg2&&  arg2 == 0)
>>> -        /* orcc %g0, r, %g0 */
>>> -        tcg_out_arith(s, TCG_REG_G0, TCG_REG_G0, arg1, ARITH_ORCC);
>>> -    else
>>> -        /* subcc r1, r2, %g0 */
>>> -        tcg_out_arith(s, TCG_REG_G0, arg1, arg2, ARITH_SUBCC);
>>> -    tcg_out_branch_i32(s, tcg_cond_to_bcond[cond], label_index);
>>> +    tcg_out_cmp(s, arg1, arg2, const_arg2);
>>
>> What's wrong with 'orcc' (produces the synthetic instruction 'tst')?
>
> What result does "orcc" give that isn't produced by "subcc"?  Unlike i386
> where "test x,x" is one byte smaller than "cmp $0,x", it seems to me there's
> no reason to distingish the arg2 == constant zero case on sparc.

Maybe it's faster on real CPUs. On my machine I don't see any
difference. I timed the following program:

#include <stdio.h>
#include <stdlib.h>

#define N 100000000

int main(int argc, char **argv)
{
    unsigned int i;

    if (atoi(argv[1])) {
        for (i = 0; i < N; i++) {
            asm volatile ("cmp %g1, 0");
        }
    } else {
        for (i = 0; i < N; i++) {
            asm volatile ("tst %g1");
        }
    }
    return 0;
}

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 5/7] tcg-sparc: Implement setcond, movcond, setcond2, brcond2.
  2009-12-19 21:25       ` Blue Swirl
@ 2009-12-19 22:52         ` Richard Henderson
  2009-12-20 11:06           ` Blue Swirl
  0 siblings, 1 reply; 21+ messages in thread
From: Richard Henderson @ 2009-12-19 22:52 UTC (permalink / raw)
  To: Blue Swirl; +Cc: qemu-devel

On 12/19/2009 01:25 PM, Blue Swirl wrote:
> On Sat, Dec 19, 2009 at 5:47 PM, Richard Henderson<rth@twiddle.net>  wrote:
>> On 12/19/2009 02:31 AM, Blue Swirl wrote:
>>>>
>>>>   static inline void tcg_out_movi_imm32(TCGContext *s, int ret, uint32_t
>>>> arg)
>>>>   {
>>>> -    if (check_fit_tl(arg, 12))
>>>> +    if (check_fit_tl(arg, 13))
>>>>          tcg_out_movi_imm13(s, ret, arg);
>>>
>>> IIRC sign extension prevents this.
>>
>> Pardon?  check_fit_tl checks a signed value, the OR opcode provides one.
>>   Where's the conflict?
>
> Long time ago I tried the same change, but the generated code was not
> correct. But now it seems to work.

I can't imagine why.  I've looked at the instruction encodings very 
carefully and double-checked the arithmetic vs GCC.

I still think the change is correct.  I can probably dig up access to a 
sparc machine and verify, but I don't know that would convince you.


r~

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH 5/7] tcg-sparc: Implement setcond, movcond, setcond2, brcond2.
  2009-12-19 22:52         ` Richard Henderson
@ 2009-12-20 11:06           ` Blue Swirl
  0 siblings, 0 replies; 21+ messages in thread
From: Blue Swirl @ 2009-12-20 11:06 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Sat, Dec 19, 2009 at 10:52 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 12/19/2009 01:25 PM, Blue Swirl wrote:
>>
>> On Sat, Dec 19, 2009 at 5:47 PM, Richard Henderson<rth@twiddle.net>
>>  wrote:
>>>
>>> On 12/19/2009 02:31 AM, Blue Swirl wrote:
>>>>>
>>>>>  static inline void tcg_out_movi_imm32(TCGContext *s, int ret, uint32_t
>>>>> arg)
>>>>>  {
>>>>> -    if (check_fit_tl(arg, 12))
>>>>> +    if (check_fit_tl(arg, 13))
>>>>>         tcg_out_movi_imm13(s, ret, arg);
>>>>
>>>> IIRC sign extension prevents this.
>>>
>>> Pardon?  check_fit_tl checks a signed value, the OR opcode provides one.
>>>  Where's the conflict?
>>
>> Long time ago I tried the same change, but the generated code was not
>> correct. But now it seems to work.
>
> I can't imagine why.  I've looked at the instruction encodings very
> carefully and double-checked the arithmetic vs GCC.
>
> I still think the change is correct.  I can probably dig up access to a
> sparc machine and verify, but I don't know that would convince you.

No need for that. I tested it and now it seems to work, thanks.

There's another case a few lines later, I'll change both.

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2009-12-20 11:06 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-12-17  1:19 [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes Richard Henderson
2009-12-16  0:34 ` [Qemu-devel] [PATCH 1/7] tcg: Generic support for conditional set and conditional move Richard Henderson
2009-12-16  0:35 ` [Qemu-devel] [PATCH 2/7] tcg-amd64: Implement setcond and movcond Richard Henderson
2009-12-16  0:36 ` [Qemu-devel] [PATCH 3/7] target-alpha: Use setcond/movcond in integer compares and cmoves Richard Henderson
2009-12-16 23:17 ` [Qemu-devel] [PATCH 4/7] tcg-i386: Implement setcond, movcond, setcond2 Richard Henderson
2009-12-16 23:26 ` [Qemu-devel] [PATCH 5/7] tcg-sparc: Implement setcond, movcond, setcond2, brcond2 Richard Henderson
2009-12-19 10:31   ` Blue Swirl
2009-12-19 17:47     ` Richard Henderson
2009-12-19 21:25       ` Blue Swirl
2009-12-19 22:52         ` Richard Henderson
2009-12-20 11:06           ` Blue Swirl
2009-12-16 23:28 ` [Qemu-devel] [PATCH 6/7] target-i386: Use setcond and movcond Richard Henderson
2009-12-16 23:29 ` [Qemu-devel] [PATCH 7/7] target-mips: " Richard Henderson
2009-12-17 15:32 ` [Qemu-devel] [PATCH 0/7] tcg: conditional set and move opcodes malc
2009-12-17 15:37   ` Laurent Desnogues
2009-12-17 17:07   ` Richard Henderson
2009-12-17 17:47     ` malc
2009-12-17 18:09       ` Richard Henderson
2009-12-17 17:48     ` Richard Henderson
2009-12-18 15:40     ` malc
2009-12-18 16:05       ` Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.