All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation
@ 2011-01-07 22:42 Richard Henderson
  2011-01-07 22:42 ` [Qemu-devel] [PATCH 1/7] tcg: Define "deposit" as an optional operation Richard Henderson
                   ` (7 more replies)
  0 siblings, 8 replies; 24+ messages in thread
From: Richard Henderson @ 2011-01-07 22:42 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alexander Graf, Aurelien Jarno

Emulating i386 -- particularly in 16-bit mode -- requires quite a few
bitfield insert operations, to handle byte and word stores into the
dword registers.  On several hosts, this can be done natively, without
resorting to a sequence of and and or instructions.  Even i386 as a
host can do better than the naive approach, and not merely for the set
that's implementable with byte/word stores.

Examples from i386-on-amd64:

0x4080d274:  movzbw -0x1335(%ebx),%ax

	before:
	0x60219f5d:  movzbl 0x0(%r13),%ebp
	0x60219f62:  movzwl %bp,%ebp
	0x60219f65:  mov    (%r14),%ebx
	0x60219f68:  and    $0xffff0000,%ebx
	0x60219f6e:  or     %ebp,%ebx

	after:
	0x6021aa5d:  movzbl 0x0(%r13),%ebp
	0x6021aa62:  mov    (%r14),%ebx
	0x6021aa65:  mov    %bp,%bx

	Note that we were able to use the word store.

0x4080e259:  mov    %dl,%dh

	before:
	0x6021d035:  mov    %ebp,%ebx
	0x6021d037:  movzbl %bl,%ebx
	0x6021d03a:  shl    $0x8,%ebx
	0x6021d03d:  and    $0xffff00ff,%ebp
	0x6021d043:  or     %ebx,%ebp

	after:
	0x6021da95:  mov    %ebp,%ebx
	0x6021da97:  ror    $0x8,%ebp
	0x6021da9a:  shrd   $0x8,%ebx,%ebp
	0x6021da9e:  rol    $0x10,%ebp

	Note that the replacement is 1 insn and 4 bytes shorter.

Counts as seen in -d in_asm:
			   byte    word
	i386/ls:	    240      28
	fedora 12 boot:	  30938	  11459
	freedos boot:	   6936	  74803

Examples from ppc-on-amd64

0x4080add0:  rlwimi  r0,r25,30,0,1

	before:
	0x6027d886:  mov    0x64(%r14),%ebx
	0x6027d88a:  mov    %ebx,%r12d
	0x6027d88d:  rol    $0x1e,%r12d
	0x6027d891:  and    $0xc0000000,%r12d
	0x6027d898:  mov    (%r14),%r13d
	0x6027d89b:  and    $0x3fffffff,%r13d
	0x6027d8a2:  or     %r13d,%r12d

	after:
	0x6027e186:  mov    (%r14),%ebx
	0x6027e189:  mov    0x64(%r14),%r12d
	0x6027e18d:  ror    $0x1e,%ebx
	0x6027e190:  shrd   $0x2,%r12d,%ebx

Counts as seen in -d in_asm:
		  rlwimi
	ppc/ls:	       9
	(no ppc kernel in qemu.org downloads?)



r~



Richard Henderson (7):
  tcg: Define "deposit" as an optional operation.
  tcg-ppc: Implement deposit operation.
  tcg-hppa: Implement deposit operation.
  tcg-ia64: Implement deposit operation.
  tcg-i386: Implement deposit operation.
  target-i386: Use deposit operation.
  target-ppc: Use deposit operation.

 target-i386/translate.c |   34 +++--------------
 target-ppc/translate.c  |   10 +++++
 tcg/README              |   14 +++++++
 tcg/hppa/tcg-target.c   |   58 ++++++++++++++++++++++++++---
 tcg/hppa/tcg-target.h   |    1 +
 tcg/i386/tcg-target.c   |   68 +++++++++++++++++++++++++++++++++--
 tcg/i386/tcg-target.h   |    2 +
 tcg/ia64/tcg-target.c   |   92 +++++++++++++++++++++++++++++++++++++++++++++++
 tcg/ia64/tcg-target.h   |    2 +
 tcg/ppc/tcg-target.c    |   17 ++++++++-
 tcg/ppc/tcg-target.h    |    1 +
 tcg/tcg-op.h            |   40 ++++++++++++++++++++
 tcg/tcg-opc.h           |    6 +++
 tcg/tcg.c               |   15 ++++++++
 14 files changed, 322 insertions(+), 38 deletions(-)

-- 
1.7.2.3

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH 1/7] tcg: Define "deposit" as an optional operation.
  2011-01-07 22:42 [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation Richard Henderson
@ 2011-01-07 22:42 ` Richard Henderson
  2011-01-07 23:48   ` Stuart Brady
  2011-01-09 21:38   ` Aurelien Jarno
  2011-01-07 22:42 ` [Qemu-devel] [PATCH 2/7] tcg-ppc: Implement deposit operation Richard Henderson
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 24+ messages in thread
From: Richard Henderson @ 2011-01-07 22:42 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alexander Graf, Aurelien Jarno

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/README    |   14 ++++++++++++++
 tcg/tcg-op.h  |   40 ++++++++++++++++++++++++++++++++++++++++
 tcg/tcg-opc.h |    6 ++++++
 tcg/tcg.c     |   13 +++++++++++++
 4 files changed, 73 insertions(+), 0 deletions(-)

diff --git a/tcg/README b/tcg/README
index 68d27ff..ef59070 100644
--- a/tcg/README
+++ b/tcg/README
@@ -285,6 +285,20 @@ the four high order bytes are set to zero.
 Indicate that the value of t0 won't be used later. It is useful to
 force dead code elimination.
 
+* deposit_i32/i64 dest, t1, t2, loc
+
+Deposit T2 as a bitfield into T1, placing the result in DEST.
+The bitfield is described by LOC, an immediate value:
+
+  bits 0:7  - the length of the bitfield
+  bits 8:15 - the position of the first bit
+
+For example, 0x101 indicates a 1-bit field at bit 1.
+This operation would be equivalent to
+
+  dest = (t1 & ~2) | ((t2 << 1) & 2)
+
+
 ********* Conditional moves
 
 * setcond_i32/i64 cond, dest, t1, t2
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 3ee0a58..c5a019a 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2071,6 +2071,44 @@ static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
     }
 }
 
+static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1,
+				       TCGv_i32 arg2, unsigned int ofs,
+				       unsigned int len)
+{
+#ifdef TCG_TARGET_HAS_deposit_i32
+  tcg_gen_op4i_i32(INDEX_op_deposit_i32, ret, arg1, arg2, (ofs << 8) | len);
+#else
+  uint32_t mask = (1u << len) - 1;
+  TCGv_i32 t1 = tcg_temp_new_i32 ();
+
+  tcg_gen_andi_i32(t1, arg2, mask);
+  tcg_gen_shli_i32(t1, t1, ofs);
+  tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
+  tcg_gen_or_i32(ret, ret, t1);
+
+  tcg_temp_free_i32(t1);
+#endif
+}
+
+static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
+				       TCGv_i64 arg2, unsigned int ofs,
+				       unsigned int len)
+{
+#ifdef TCG_TARGET_HAS_deposit_i64
+  tcg_gen_op4i_i64(INDEX_op_deposit_i64, ret, arg1, arg2, (ofs << 8) | len);
+#else
+  uint64_t mask = (1ull << len) - 1;
+  TCGv_i64 t1 = tcg_temp_new_i64 ();
+
+  tcg_gen_andi_i64(t1, arg2, mask);
+  tcg_gen_shli_i64(t1, t1, ofs);
+  tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
+  tcg_gen_or_i64(ret, ret, t1);
+
+  tcg_temp_free_i64(t1);
+#endif
+}
+
 /***************************************/
 /* QEMU specific operations. Their type depend on the QEMU CPU
    type. */
@@ -2384,6 +2422,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_rotli_tl tcg_gen_rotli_i64
 #define tcg_gen_rotr_tl tcg_gen_rotr_i64
 #define tcg_gen_rotri_tl tcg_gen_rotri_i64
+#define tcg_gen_deposit_tl tcg_gen_deposit_i64
 #define tcg_const_tl tcg_const_i64
 #define tcg_const_local_tl tcg_const_local_i64
 #else
@@ -2454,6 +2493,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_rotli_tl tcg_gen_rotli_i32
 #define tcg_gen_rotr_tl tcg_gen_rotr_i32
 #define tcg_gen_rotri_tl tcg_gen_rotri_i32
+#define tcg_gen_deposit_tl tcg_gen_deposit_i32
 #define tcg_const_tl tcg_const_i32
 #define tcg_const_local_tl tcg_const_local_i32
 #endif
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 2a98fed..ded6311 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -78,6 +78,9 @@ DEF(sar_i32, 1, 2, 0, 0)
 DEF(rotl_i32, 1, 2, 0, 0)
 DEF(rotr_i32, 1, 2, 0, 0)
 #endif
+#ifdef TCG_TARGET_HAS_deposit_i32
+DEF(deposit_i32, 1, 2, 1, 0)
+#endif
 
 DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #if TCG_TARGET_REG_BITS == 32
@@ -168,6 +171,9 @@ DEF(sar_i64, 1, 2, 0, 0)
 DEF(rotl_i64, 1, 2, 0, 0)
 DEF(rotr_i64, 1, 2, 0, 0)
 #endif
+#ifdef TCG_TARGET_HAS_deposit_i64
+DEF(deposit_i64, 1, 2, 1, 0)
+#endif
 
 DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #ifdef TCG_TARGET_HAS_ext8s_i64
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 5dd6a2c..e95a42f 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -956,6 +956,19 @@ void tcg_dump_ops(TCGContext *s, FILE *outfile)
                     fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]);
                 i = 1;
                 break;
+#if defined(TCG_TARGET_HAS_deposit_i32) || defined(TCG_TARGET_HAS_deposit_i64)
+# ifdef TCG_TARGET_HAS_deposit_i32
+            case INDEX_op_deposit_i32:
+# endif
+# ifdef TCG_TARGET_HAS_deposit_i64
+            case INDEX_op_deposit_i64:
+# endif
+                fprintf(outfile, ",%u,%u", (unsigned)args[k] >> 8,
+                        (unsigned)args[k] & 0xff);
+                k++;
+                i = 1;
+                break;
+#endif
             default:
                 i = 0;
                 break;
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH 2/7] tcg-ppc: Implement deposit operation.
  2011-01-07 22:42 [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation Richard Henderson
  2011-01-07 22:42 ` [Qemu-devel] [PATCH 1/7] tcg: Define "deposit" as an optional operation Richard Henderson
@ 2011-01-07 22:42 ` Richard Henderson
  2011-01-07 23:33   ` [Qemu-devel] " malc
  2011-01-07 22:42 ` [Qemu-devel] [PATCH 3/7] tcg-hppa: " Richard Henderson
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 24+ messages in thread
From: Richard Henderson @ 2011-01-07 22:42 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alexander Graf, Aurelien Jarno

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ppc/tcg-target.c |   17 ++++++++++++++++-
 tcg/ppc/tcg-target.h |    1 +
 2 files changed, 17 insertions(+), 1 deletions(-)

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 7970268..2947d1b 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -1611,6 +1611,21 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         }
         break;
 
+    case INDEX_op_deposit_i32:
+        {
+            unsigned len = args[3] & 31;
+            unsigned lsb_ofs = (args[3] >> 8) & 31;
+            unsigned msb_ofs = 31 - lsb_ofs;
+
+            tcg_out32 (s, RLWIMI
+                       | RA(args[0])
+                       | RS(args[2])
+                       | SH((32 - msb_ofs - len) & 31)
+                       | MB(msb_ofs)
+                       | ME((msb_ofs + len - 1) & 31));
+        }
+        break;
+
     case INDEX_op_add2_i32:
         if (args[0] == args[3] || args[0] == args[5]) {
             tcg_out32 (s, ADDC | TAB (0, args[2], args[4]));
@@ -1829,9 +1844,9 @@ static const TCGTargetOpDef ppc_op_defs[] = {
     { INDEX_op_shl_i32, { "r", "r", "ri" } },
     { INDEX_op_shr_i32, { "r", "r", "ri" } },
     { INDEX_op_sar_i32, { "r", "r", "ri" } },
-
     { INDEX_op_rotl_i32, { "r", "r", "ri" } },
     { INDEX_op_rotr_i32, { "r", "r", "ri" } },
+    { INDEX_op_deposit_i32, { "r", "0", "r" } },
 
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index a1f8599..bbf38d5 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -92,6 +92,7 @@ enum {
 #define TCG_TARGET_HAS_eqv_i32
 #define TCG_TARGET_HAS_nand_i32
 #define TCG_TARGET_HAS_nor_i32
+#define TCG_TARGET_HAS_deposit_i32
 
 #define TCG_AREG0 TCG_REG_R27
 
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH 3/7] tcg-hppa: Implement deposit operation.
  2011-01-07 22:42 [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation Richard Henderson
  2011-01-07 22:42 ` [Qemu-devel] [PATCH 1/7] tcg: Define "deposit" as an optional operation Richard Henderson
  2011-01-07 22:42 ` [Qemu-devel] [PATCH 2/7] tcg-ppc: Implement deposit operation Richard Henderson
@ 2011-01-07 22:42 ` Richard Henderson
  2011-01-07 23:35   ` Stuart Brady
  2011-01-07 22:43 ` [Qemu-devel] [PATCH 4/7] tcg-ia64: " Richard Henderson
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 24+ messages in thread
From: Richard Henderson @ 2011-01-07 22:42 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alexander Graf, Aurelien Jarno

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/hppa/tcg-target.c |   58 +++++++++++++++++++++++++++++++++++++++++++-----
 tcg/hppa/tcg-target.h |    1 +
 2 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 7f4653e..2c5df57 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -467,6 +467,22 @@ static inline void tcg_out_dep(TCGContext *s, int ret, int arg,
               | INSN_SHDEP_CP(31 - ofs) | INSN_DEP_LEN(len));
 }
 
+static inline void tcg_out_depi(TCGContext *s, int ret, int arg,
+                                unsigned ofs, unsigned len)
+{
+    assert(ofs < 32 && len <= 32 - ofs);
+    tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(val)
+              | INSN_SHDEP_CP(31 - ofs) | INSN_DEP_LEN(len));
+}
+
+static inline void tcg_out_zdep(TCGContext *s, int ret, int arg,
+                                unsigned ofs, unsigned len)
+{
+    assert(ofs < 32 && len <= 32 - ofs);
+    tcg_out32(s, INSN_ZDEP | INSN_R2(ret) | INSN_R1(arg)
+              | INSN_SHDEP_CP(31 - ofs) | INSN_DEP_LEN(len));
+}
+
 static inline void tcg_out_shd(TCGContext *s, int ret, int hi, int lo,
                                unsigned count)
 {
@@ -499,8 +515,7 @@ static void tcg_out_ori(TCGContext *s, int ret, int arg, tcg_target_ulong m)
     assert(bs1 == 32 || (1ul << bs1) > m);
 
     tcg_out_mov(s, TCG_TYPE_I32, ret, arg);
-    tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(-1)
-              | INSN_SHDEP_CP(31 - bs0) | INSN_DEP_LEN(bs1 - bs0));
+    tcg_out_depi(ret, -1, bs0, bs1 - bs0);
 }
 
 static void tcg_out_andi(TCGContext *s, int ret, int arg, tcg_target_ulong m)
@@ -529,8 +544,7 @@ static void tcg_out_andi(TCGContext *s, int ret, int arg, tcg_target_ulong m)
         tcg_out_extr(s, ret, arg, 0, ls0, 0);
     } else {
         tcg_out_mov(s, TCG_TYPE_I32, ret, arg);
-        tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(0)
-                  | INSN_SHDEP_CP(31 - ls0) | INSN_DEP_LEN(ls1 - ls0));
+        tcg_out_depi(s, ret, 0, ls0, ls1 - ls0);
     }
 }
 
@@ -547,8 +561,7 @@ static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg)
 static void tcg_out_shli(TCGContext *s, int ret, int arg, int count)
 {
     count &= 31;
-    tcg_out32(s, INSN_ZDEP | INSN_R2(ret) | INSN_R1(arg)
-              | INSN_SHDEP_CP(31 - count) | INSN_DEP_LEN(32 - count));
+    tcg_out_zdep(s, ret, arg, count, 32 - count);
 }
 
 static void tcg_out_shl(TCGContext *s, int ret, int arg, int creg)
@@ -1407,6 +1420,38 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         }
         break;
 
+    case INDEX_op_deposit_i32:
+        {
+            unsigned ofs = args[3] >> 8, len = args[3] & 0xff;
+            int arg2 = args[2];
+            int arg1 = args[1];
+            int arg0 = args[0];
+
+            if (const_args[1]) {
+                if (const_args[2]) {
+                    tcg_out_movi(s, TCG_TYPE_I32, arg0,
+                                 (arg2 & ((1u << len) - 1)) << ofs);
+                } else {
+                    tcg_out_zdep(s, arg0, arg2, ofs, len);
+                }
+            } else {
+                if (const_args[2]) {
+                    tcg_out_mov(s, TCG_TYPE_I32, arg0, arg1);
+                    tcg_out_depi(s, arg0, arg2, ofs, len);
+                } else {
+                    if (arg0 != arg1) {
+                        if (arg0 == arg2) {
+                            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R20, arg2);
+                            arg2 = TCG_REG_R20;
+                        }
+                        tcg_out_mov(s, TCG_TYPE_I32, arg0, arg1);
+                    }
+                    tcg_out_dep(s, arg0, arg2, ofs, len);
+                }
+            }
+        }
+        break;
+
     case INDEX_op_mul_i32:
         tcg_out_xmpyu(s, args[0], TCG_REG_R0, args[1], args[2]);
         break;
@@ -1534,6 +1579,7 @@ static const TCGTargetOpDef hppa_op_defs[] = {
     { INDEX_op_sar_i32, { "r", "r", "ri" } },
     { INDEX_op_rotl_i32, { "r", "r", "ri" } },
     { INDEX_op_rotr_i32, { "r", "r", "ri" } },
+    { INDEX_op_deposit_i32 { "r", "rZ", "rJ" } },
 
     { INDEX_op_bswap16_i32, { "r", "r" } },
     { INDEX_op_bswap32_i32, { "r", "r" } },
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index a5cc440..d3fe075 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -87,6 +87,7 @@ enum {
 /* optional instructions */
 // #define TCG_TARGET_HAS_div_i32
 #define TCG_TARGET_HAS_rot_i32
+#define TCG_TARGET_HAS_deposit_i32
 #define TCG_TARGET_HAS_ext8s_i32
 #define TCG_TARGET_HAS_ext16s_i32
 #define TCG_TARGET_HAS_bswap16_i32
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH 4/7] tcg-ia64: Implement deposit operation.
  2011-01-07 22:42 [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation Richard Henderson
                   ` (2 preceding siblings ...)
  2011-01-07 22:42 ` [Qemu-devel] [PATCH 3/7] tcg-hppa: " Richard Henderson
@ 2011-01-07 22:43 ` Richard Henderson
  2011-01-09 22:04   ` [Qemu-devel] " Aurelien Jarno
  2011-01-07 22:43 ` [Qemu-devel] [PATCH 5/7] tcg-i386: " Richard Henderson
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 24+ messages in thread
From: Richard Henderson @ 2011-01-07 22:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alexander Graf, Aurelien Jarno

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c |   92 +++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/ia64/tcg-target.h |    2 +
 2 files changed, 94 insertions(+), 0 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 3ddf434..2708d55 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -237,6 +237,7 @@ enum {
     OPC_CMP4_LT_A6            = 0x18400000000ull,
     OPC_CMP4_LTU_A6           = 0x1a400000000ull,
     OPC_CMP4_EQ_A6            = 0x1c400000000ull,
+    OPC_DEP_I15               = 0x08000000000ull,
     OPC_DEP_Z_I12             = 0x0a600000000ull,
     OPC_EXTR_I11              = 0x0a400002000ull,
     OPC_EXTR_U_I11            = 0x0a400000000ull,
@@ -508,6 +509,19 @@ static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1,
            | (qp & 0x3f);
 }
 
+static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1,
+                                   int r2, int r3, int len, uint64_t len,
+                                   uint64_t cpos)
+{
+    return opc
+           | ((cpos & 0x3f) << 31)
+           | ((len & 0x0f) << 27)
+           | ((r3 & 0x7f) << 20)
+           | ((r2 & 0x7f) << 13)
+           | ((r1 & 0x7f) << 6)
+           | (qp & 0x3f);
+}
+
 static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm)
 {
     return opc
@@ -1335,6 +1349,73 @@ static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
                    tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb));
 }
 
+static void tcg_out_deposit_i32(TCGContext *s, TCGArg out, TCGArg in,
+                                TCGArg val, unsigned ofs, unsigned len)
+{
+    uint64_t nop_m = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+
+    if (val == 0) {
+        tcg_out_bundle(s, mmI, nop_m, nop_m,
+                       tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, out, in,
+                                   len - 1, 63 - ofs));
+    } else if (len <= 16) {
+        tcg_out_bundle(s, mmI, nop_m, nop_m,
+                       tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, out, in, val,
+                                   len - 1, 63 - ofs));
+    } else {
+        tcg_out_bundle(s, miI, nop_m,
+                       tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2,
+                                   val, 16, 31 - 16),
+                       tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, out, in, val,
+                                   16 - 1, 63 - ofs));
+        tcg_out_bundle(s, mmI, nop_m, nop_m,
+                       tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, out, out,
+                                   TCG_REG_R2, len - 16 - 1, 63 - (ofs + 16)));
+    }
+}
+
+static void tcg_out_deposit_i64(TCGContext *s, TCGArg out, TCGArg in,
+                                TCGArg val, unsigned ofs, unsigned len)
+{
+    uint64_t nop_m = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+
+    if (val == 0) {
+        tcg_out_bundle(s, mmI, nop_m, nop_m,
+                       tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, out, in,
+                                   len - 1, 63 - ofs));
+    } else if (len <= 16) {
+        tcg_out_bundle(s, mmI, nop_m, nop_m,
+                       tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, out, in, val,
+                                   len - 1, 63 - ofs));
+    } else {
+        uint64_t ror = 0, shrp, rol = 0;
+
+        if (ofs) {
+	    ror = tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, TCG_REG_R2,
+                              in, in, ofs);
+            in = TCG_REG_R2;
+        }
+
+        shrp = tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, out, in, val, len);
+
+        ofs = (ofs - len) & 63;
+        if (ofs) {
+            rol = tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, out, out, out, 64-ofs);
+        }
+
+        if (ror) {
+            tcg_out_bundle(s, mII, nop_m, ror, shrp);
+            if (rol) {
+                tcg_out_bundle(s, mmI, nop_m, nop_m, rol);
+            }
+        } else if (rol) {
+            tcg_out_bundle(s, mII, nop_m, shrp, rol);
+        } else {
+            tcg_out_bundle(s, mmI, nop_m, nop_m, shrp);
+        }
+    }
+}
+
 static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1,
                                      TCGArg arg2, int cmp4)
 {
@@ -2063,6 +2144,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_rotr_i64(s, args[0], args[1], args[2], const_args[2]);
         break;
 
+    case INDEX_op_deposit_i32:
+        tcg_out_deposit_i32(s, args[0], args[1], args[2],
+                            (args[3] >> 8) & 31, args[3] & 31);
+        break;
+    case INDEX_op_deposit_i64:
+        tcg_out_deposit_i64(s, args[0], args[1], args[2],
+                            (args[3] >> 8) & 63, args[3] & 63);
+        break;
+
     case INDEX_op_ext8s_i32:
     case INDEX_op_ext8s_i64:
         tcg_out_ext(s, OPC_SXT1_I29, args[0], args[1]);
@@ -2192,6 +2282,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_shr_i32, { "r", "rZ", "ri" } },
     { INDEX_op_rotl_i32, { "r", "rZ", "ri" } },
     { INDEX_op_rotr_i32, { "r", "rZ", "ri" } },
+    { INDEX_op_deposit_i32, { "r", "rZ", "rZ" } },
 
     { INDEX_op_ext8s_i32, { "r", "rZ"} },
     { INDEX_op_ext8u_i32, { "r", "rZ"} },
@@ -2238,6 +2329,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_shr_i64, { "r", "rZ", "ri" } },
     { INDEX_op_rotl_i64, { "r", "rZ", "ri" } },
     { INDEX_op_rotr_i64, { "r", "rZ", "ri" } },
+    { INDEX_op_deposit_i64, { "r", "rZ", "rZ" } },
 
     { INDEX_op_ext8s_i64, { "r", "rZ"} },
     { INDEX_op_ext8u_i64, { "r", "rZ"} },
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index e56e88f..80e3534 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -131,6 +131,8 @@ enum {
 #define TCG_TARGET_HAS_orc_i64
 #define TCG_TARGET_HAS_rot_i32
 #define TCG_TARGET_HAS_rot_i64
+#define TCG_TARGET_HAS_deposit_i32
+#define TCG_TARGET_HAS_deposit_i64
 
 /* optional instructions automatically implemented */
 #undef TCG_TARGET_HAS_neg_i32   /* sub r1, r0, r3 */
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH 5/7] tcg-i386: Implement deposit operation.
  2011-01-07 22:42 [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation Richard Henderson
                   ` (3 preceding siblings ...)
  2011-01-07 22:43 ` [Qemu-devel] [PATCH 4/7] tcg-ia64: " Richard Henderson
@ 2011-01-07 22:43 ` Richard Henderson
  2011-01-09 21:53   ` [Qemu-devel] " Aurelien Jarno
  2011-01-07 22:43 ` [Qemu-devel] [PATCH 6/7] target-i386: Use " Richard Henderson
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 24+ messages in thread
From: Richard Henderson @ 2011-01-07 22:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alexander Graf, Aurelien Jarno

Special case deposits that are implementable with byte and word stores.
Otherwise implement with double-word shift plus rotates.

Expose tcg_reg_alloc to the backend for allocation of scratch registers.
There's an edge condition that cannot actually happen at the moment due
to a bug elsewhere in the register allocator, but it doesn't seem right
to leave that unfixed.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   68 ++++++++++++++++++++++++++++++++++++++++++++++--
 tcg/i386/tcg-target.h |    2 +
 tcg/tcg.c             |    2 +
 3 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index bb19a95..cc7d266 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -258,7 +258,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_JMP_long	(0xe9)
 #define OPC_JMP_short	(0xeb)
 #define OPC_LEA         (0x8d)
-#define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
+#define OPC_MOVB_EbGb	(0x88)		/* stores, more or less */
+#define OPC_MOVB_GbEb   (0x8a)		/* loads, more or less */
 #define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
 #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
 #define OPC_MOVL_EvIz	(0xc7)
@@ -277,6 +278,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_SHIFT_1	(0xd1)
 #define OPC_SHIFT_Ib	(0xc1)
 #define OPC_SHIFT_cl	(0xd3)
+#define OPC_SHRD_Ib	(0xac | P_EXT)
 #define OPC_TESTL	(0x85)
 #define OPC_XCHG_ax_r32	(0x90)
 
@@ -710,6 +712,59 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
     }
 }
 
+static void tcg_out_deposit(TCGContext *s, int inout, int val,
+                            unsigned ofs, unsigned len, int rexw)
+{
+    /* Look for MOVW special case.  */
+    if (ofs == 0 && len == 16) {
+        tcg_out_modrm(s, OPC_MOVL_GvEv + P_DATA16, inout, val);
+        return;
+    }
+
+    /* Look for MOVB w/ %reg_l special case.  */
+    if (ofs == 0 && len == 8
+        && (TCG_TARGET_REG_BITS == 64 || (inout < 4 && val < 4))) {
+        tcg_out_modrm(s, OPC_MOVB_GbEb + P_REXB_R + P_REXB_RM, inout, val);
+        return;
+    }
+
+    /* Look for MOVB w/ %reg_h special case.  */
+    if (ofs == 8 && len == 8 && inout < 4 && val < 4) {
+        tcg_out_modrm(s, OPC_MOVB_GbEb, inout + 4, val);
+        return;
+    }
+
+    /* If we have a real deposit from self, we need a temporary.  */
+    /* ??? There really ought to be a way to easily allocate a scratch.  */
+    if (inout == val) {
+        TCGType type = rexw ? TCG_TYPE_I64 : TCG_TYPE_I32;
+        TCGRegSet inuse = s->reserved_regs;
+
+        tcg_regset_set_reg(inuse, inout);
+        val = tcg_reg_alloc(s, tcg_target_available_regs[type], inuse);
+
+        tcg_out_mov(s, type, val, inout);
+    }
+
+    /* Arrange for the field to be at offset 0.  */
+    if (ofs != 0) {
+        tcg_out_shifti(s, SHIFT_ROR + rexw, inout, ofs);
+    }
+
+    /* Shift the value into the top of the word.  This shifts the old
+       field out of the bottom of the word and leaves us with the whole
+       word rotated right by the size of the field.  */
+    tcg_out_modrm(s, OPC_SHRD_Ib + rexw, val, inout);
+    tcg_out8(s, len);
+
+    /* Restore the field to its proper location.  */
+    ofs = (len + ofs) & (rexw ? 63 : 31);
+    if (ofs != 0) {
+        tcg_out_shifti(s, SHIFT_ROL + rexw, inout, ofs);
+    }
+}
+
+
 /* Use SMALL != 0 to force a short forward branch.  */
 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
 {
@@ -1266,7 +1321,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
 
     switch (sizeop) {
     case 0:
-        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
+        tcg_out_modrm_offset(s, OPC_MOVB_EbGb + P_REXB_R, datalo, base, ofs);
         break;
     case 1:
         if (bswap) {
@@ -1504,7 +1559,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     OP_32_64(st8):
-        tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
+        tcg_out_modrm_offset(s, OPC_MOVB_EbGb | P_REXB_R,
                              args[0], args[1], args[2]);
         break;
     OP_32_64(st16):
@@ -1603,6 +1658,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         break;
 
+    OP_32_64(deposit):
+        tcg_out_deposit(s, args[0], args[2],
+                        args[3] >> 8, args[3] & 0xff, rexw);
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
                          args[3], 0);
@@ -1783,6 +1843,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
     { INDEX_op_rotl_i32, { "r", "0", "ci" } },
     { INDEX_op_rotr_i32, { "r", "0", "ci" } },
+    { INDEX_op_deposit_i32, { "r", "0", "r" } },
 
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
@@ -1835,6 +1896,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_sar_i64, { "r", "0", "ci" } },
     { INDEX_op_rotl_i64, { "r", "0", "ci" } },
     { INDEX_op_rotr_i64, { "r", "0", "ci" } },
+    { INDEX_op_deposit_i64, { "r", "0", "r" } },
 
     { INDEX_op_brcond_i64, { "r", "re" } },
     { INDEX_op_setcond_i64, { "r", "r", "re" } },
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index bfafbfc..9f90d17 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -77,6 +77,7 @@ enum {
 /* optional instructions */
 #define TCG_TARGET_HAS_div2_i32
 #define TCG_TARGET_HAS_rot_i32
+#define TCG_TARGET_HAS_deposit_i32
 #define TCG_TARGET_HAS_ext8s_i32
 #define TCG_TARGET_HAS_ext16s_i32
 #define TCG_TARGET_HAS_ext8u_i32
@@ -94,6 +95,7 @@ enum {
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64
 #define TCG_TARGET_HAS_rot_i64
+#define TCG_TARGET_HAS_deposit_i64
 #define TCG_TARGET_HAS_ext8s_i64
 #define TCG_TARGET_HAS_ext16s_i64
 #define TCG_TARGET_HAS_ext32s_i64
diff --git a/tcg/tcg.c b/tcg/tcg.c
index e95a42f..5ab9122 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -156,6 +156,8 @@ int gen_new_label(void)
     return idx;
 }
 
+static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2);
+
 #include "tcg-target.c"
 
 /* pool based memory allocation */
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH 6/7] target-i386: Use deposit operation.
  2011-01-07 22:42 [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation Richard Henderson
                   ` (4 preceding siblings ...)
  2011-01-07 22:43 ` [Qemu-devel] [PATCH 5/7] tcg-i386: " Richard Henderson
@ 2011-01-07 22:43 ` Richard Henderson
  2011-01-07 22:43 ` [Qemu-devel] [PATCH 7/7] target-ppc: " Richard Henderson
  2011-01-07 23:10 ` [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation Peter Maydell
  7 siblings, 0 replies; 24+ messages in thread
From: Richard Henderson @ 2011-01-07 22:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alexander Graf, Aurelien Jarno

Use this for assignment to the low byte or low word of a register.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-i386/translate.c |   34 ++++++----------------------------
 1 files changed, 6 insertions(+), 28 deletions(-)

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 7b6e3c2..c008450 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -274,28 +274,16 @@ static inline void gen_op_andl_A0_ffff(void)
 
 static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
 {
-    TCGv tmp;
-
     switch(ot) {
     case OT_BYTE:
-        tmp = tcg_temp_new();
-        tcg_gen_ext8u_tl(tmp, t0);
         if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
-            tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xff);
-            tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
         } else {
-            tcg_gen_shli_tl(tmp, tmp, 8);
-            tcg_gen_andi_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], ~0xff00);
-            tcg_gen_or_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], tmp);
+            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
         }
-        tcg_temp_free(tmp);
         break;
     case OT_WORD:
-        tmp = tcg_temp_new();
-        tcg_gen_ext16u_tl(tmp, t0);
-        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
-        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
-        tcg_temp_free(tmp);
+        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
         break;
     default: /* XXX this shouldn't be reached;  abort? */
     case OT_LONG:
@@ -323,15 +311,9 @@ static inline void gen_op_mov_reg_T1(int ot, int reg)
 
 static inline void gen_op_mov_reg_A0(int size, int reg)
 {
-    TCGv tmp;
-
     switch(size) {
     case 0:
-        tmp = tcg_temp_new();
-        tcg_gen_ext16u_tl(tmp, cpu_A0);
-        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
-        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
-        tcg_temp_free(tmp);
+        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_A0, 0, 16);
         break;
     default: /* XXX this shouldn't be reached;  abort? */
     case 1:
@@ -415,9 +397,7 @@ static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
     switch(size) {
     case 0:
         tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
-        tcg_gen_ext16u_tl(cpu_tmp0, cpu_tmp0);
-        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
-        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0);
+        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
         break;
     case 1:
         tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
@@ -439,9 +419,7 @@ static inline void gen_op_add_reg_T0(int size, int reg)
     switch(size) {
     case 0:
         tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
-        tcg_gen_ext16u_tl(cpu_tmp0, cpu_tmp0);
-        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
-        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0);
+        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
         break;
     case 1:
         tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH 7/7] target-ppc: Use deposit operation.
  2011-01-07 22:42 [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation Richard Henderson
                   ` (5 preceding siblings ...)
  2011-01-07 22:43 ` [Qemu-devel] [PATCH 6/7] target-i386: Use " Richard Henderson
@ 2011-01-07 22:43 ` Richard Henderson
  2011-01-07 23:10 ` [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation Peter Maydell
  7 siblings, 0 replies; 24+ messages in thread
From: Richard Henderson @ 2011-01-07 22:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alexander Graf, Aurelien Jarno

Use this in implementing rl[wd]imi, at least for the cases
that don't require true rotation.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-ppc/translate.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 74e06d7..f45c0ec 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1516,6 +1516,11 @@ static void gen_rlwimi(DisasContext *ctx)
     sh = SH(ctx->opcode);
     if (likely(sh == 0 && mb == 0 && me == 31)) {
         tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+    } else if ((31 - me) == sh && mb <= me) {
+        /* This is a well-behaved bitfield deposit.  */
+        tcg_gen_deposit_tl (cpu_gpr[rA(ctx->opcode)],
+                            cpu_gpr[rA(ctx->opcode)],
+                            cpu_gpr[rS(ctx->opcode)], sh, me - mb + 1);
     } else {
         target_ulong mask;
         TCGv t1;
@@ -1761,6 +1766,11 @@ static inline void gen_rldimi(DisasContext *ctx, int mbn, int shn)
     me = 63 - sh;
     if (unlikely(sh == 0 && mb == 0)) {
         tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+    } else if (mb <= me) {
+        /* This is a well-behaved bitfield deposit.  */
+        tcg_gen_deposit_tl (cpu_gpr[rA(ctx->opcode)],
+                            cpu_gpr[rA(ctx->opcode)],
+                            cpu_gpr[rS(ctx->opcode)], sh, me - mb + 1);
     } else {
         TCGv t0, t1;
         target_ulong mask;
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation
  2011-01-07 22:42 [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation Richard Henderson
                   ` (6 preceding siblings ...)
  2011-01-07 22:43 ` [Qemu-devel] [PATCH 7/7] target-ppc: " Richard Henderson
@ 2011-01-07 23:10 ` Peter Maydell
  2011-01-07 23:37   ` Richard Henderson
  7 siblings, 1 reply; 24+ messages in thread
From: Peter Maydell @ 2011-01-07 23:10 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, Aurelien Jarno, Alexander Graf

On 7 January 2011 22:42, Richard Henderson <rth@twiddle.net> wrote:
> Emulating i386 -- particularly in 16-bit mode -- requires quite a few
> bitfield insert operations, to handle byte and word stores into the
> dword registers.  On several hosts, this can be done natively, without
> resorting to a sequence of and and or instructions.

+* deposit_i32/i64 dest, t1, t2, loc
+
+Deposit T2 as a bitfield into T1, placing the result in DEST.
+The bitfield is described by LOC, an immediate value:
+
+  bits 0:7  - the length of the bitfield
+  bits 8:15 - the position of the first bit
+
+For example, 0x101 indicates a 1-bit field at bit 1.
+This operation would be equivalent to
+
+  dest = (t1 & ~2) | ((t2 << 1) & 2)

Unless I've missed something, deposit_i32 is basically the same
as the ARM BFI instruction, so for ARM we could use deposit_i32 in
the implementation of BFI (and conversely implement deposit_i32
using BFI when we're generating for an ARMv6T2 or better host.)

-- PMM

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Qemu-devel] Re: [PATCH 2/7] tcg-ppc: Implement deposit operation.
  2011-01-07 22:42 ` [Qemu-devel] [PATCH 2/7] tcg-ppc: Implement deposit operation Richard Henderson
@ 2011-01-07 23:33   ` malc
  0 siblings, 0 replies; 24+ messages in thread
From: malc @ 2011-01-07 23:33 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, Aurelien Jarno, Alexander Graf

On Fri, 7 Jan 2011, Richard Henderson wrote:

> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ppc/tcg-target.c |   17 ++++++++++++++++-
>  tcg/ppc/tcg-target.h |    1 +
>  2 files changed, 17 insertions(+), 1 deletions(-)
> 

PPC bits look fine to me.

[..snip..]

-- 
mailto:av1474@comtv.ru

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH 3/7] tcg-hppa: Implement deposit operation.
  2011-01-07 22:42 ` [Qemu-devel] [PATCH 3/7] tcg-hppa: " Richard Henderson
@ 2011-01-07 23:35   ` Stuart Brady
  2011-01-07 23:41     ` Richard Henderson
  0 siblings, 1 reply; 24+ messages in thread
From: Stuart Brady @ 2011-01-07 23:35 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, Aurelien Jarno, Alexander Graf

On Fri, Jan 07, 2011 at 02:42:59PM -0800, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/hppa/tcg-target.c |   58 +++++++++++++++++++++++++++++++++++++++++++-----
>  tcg/hppa/tcg-target.h |    1 +
>  2 files changed, 53 insertions(+), 6 deletions(-)
> 
> diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
> index 7f4653e..2c5df57 100644
> --- a/tcg/hppa/tcg-target.c
> +++ b/tcg/hppa/tcg-target.c
> @@ -467,6 +467,22 @@ static inline void tcg_out_dep(TCGContext *s, int ret, int arg,
>                | INSN_SHDEP_CP(31 - ofs) | INSN_DEP_LEN(len));
>  }
>  
> +static inline void tcg_out_depi(TCGContext *s, int ret, int arg,
                                                               ^^^
> +                                unsigned ofs, unsigned len)
> +{
> +    assert(ofs < 32 && len <= 32 - ofs);
> +    tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(val)
                                                        ^^^

The parameter should be named val, too.

[...]

>  static void tcg_out_shl(TCGContext *s, int ret, int arg, int creg)
> @@ -1407,6 +1420,38 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
>          }
>          break;
>  
> +    case INDEX_op_deposit_i32:
> +        {
> +            unsigned ofs = args[3] >> 8, len = args[3] & 0xff;
> +            int arg2 = args[2];
> +            int arg1 = args[1];
> +            int arg0 = args[0];
> +
> +            if (const_args[1]) {

Surely const_args[1] && arg1 == 0?

> +                if (const_args[2]) {
> +                    tcg_out_movi(s, TCG_TYPE_I32, arg0,
> +                                 (arg2 & ((1u << len) - 1)) << ofs);
> +                } else {
> +                    tcg_out_zdep(s, arg0, arg2, ofs, len);
> +                }

Otherwise, looks good at first glance.

It'll be a few days before I can test on an HPPA box, though.

Cheers,
-- 
Stuart Brady

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation
  2011-01-07 23:10 ` [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation Peter Maydell
@ 2011-01-07 23:37   ` Richard Henderson
  0 siblings, 0 replies; 24+ messages in thread
From: Richard Henderson @ 2011-01-07 23:37 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel, Aurelien Jarno, Alexander Graf

On 01/07/2011 03:10 PM, Peter Maydell wrote:
> Unless I've missed something, deposit_i32 is basically the same
> as the ARM BFI instruction, so for ARM we could use deposit_i32 in
> the implementation of BFI (and conversely implement deposit_i32
> using BFI when we're generating for an ARMv6T2 or better host.)

That's correct.

I'll admit to not knowing the arm/thumb variants very well.  While
I can see that gcc sometimes implements its similar "insv" operation
with BFC and BFI, knowing under what conditions those are available
is ... slightly complex to say the least.


r~

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH 3/7] tcg-hppa: Implement deposit operation.
  2011-01-07 23:35   ` Stuart Brady
@ 2011-01-07 23:41     ` Richard Henderson
  0 siblings, 0 replies; 24+ messages in thread
From: Richard Henderson @ 2011-01-07 23:41 UTC (permalink / raw)
  To: Stuart Brady; +Cc: qemu-devel, Aurelien Jarno, Alexander Graf

On 01/07/2011 03:35 PM, Stuart Brady wrote:
>> +static inline void tcg_out_depi(TCGContext *s, int ret, int arg,
>                                                                ^^^
>> +                                unsigned ofs, unsigned len)
>> +{
>> +    assert(ofs < 32 && len <= 32 - ofs);
>> +    tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(val)
>                                                         ^^^
> 
> The parameter should be named val, too.

Oops.  It seems I failed to push that trivial change back from the gcc farm test box.

>> +            if (const_args[1]) {
> 
> Surely const_args[1] && arg1 == 0?

This is implied by the "Z" constraint used.


r~

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH 1/7] tcg: Define "deposit" as an optional operation.
  2011-01-07 22:42 ` [Qemu-devel] [PATCH 1/7] tcg: Define "deposit" as an optional operation Richard Henderson
@ 2011-01-07 23:48   ` Stuart Brady
  2011-01-09 21:38   ` Aurelien Jarno
  1 sibling, 0 replies; 24+ messages in thread
From: Stuart Brady @ 2011-01-07 23:48 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, Aurelien Jarno, Alexander Graf

On Fri, Jan 07, 2011 at 02:42:57PM -0800, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/README    |   14 ++++++++++++++
>  tcg/tcg-op.h  |   40 ++++++++++++++++++++++++++++++++++++++++
>  tcg/tcg-opc.h |    6 ++++++
>  tcg/tcg.c     |   13 +++++++++++++
>  4 files changed, 73 insertions(+), 0 deletions(-)
> 
> diff --git a/tcg/README b/tcg/README
> index 68d27ff..ef59070 100644
> --- a/tcg/README
> +++ b/tcg/README
> @@ -285,6 +285,20 @@ the four high order bytes are set to zero.
>  Indicate that the value of t0 won't be used later. It is useful to
>  force dead code elimination.
>  
> +* deposit_i32/i64 dest, t1, t2, loc
> +
> +Deposit T2 as a bitfield into T1, placing the result in DEST.
> +The bitfield is described by LOC, an immediate value:
> +
> +  bits 0:7  - the length of the bitfield
> +  bits 8:15 - the position of the first bit
> +
> +For example, 0x101 indicates a 1-bit field at bit 1.
> +This operation would be equivalent to
> +
> +  dest = (t1 & ~2) | ((t2 << 1) & 2)

I'm being rather picky, but would something like this be better? :-

  For example, 0x804 indicates a 4-bit field starting from bit 8.
  This operation would be equivalent to:

    dest = (t1 & ~(0xf << 8)) | ((t2 << 8) & (0xf << 8))

OTOH, the code in your version was simpler... so maybe 0x201 or 0x102
as a compromise?

I suppose it's unlikely that anyone's really going to need the example
though, so I'm probably fussing too much. :-)

Cheers,
-- 
Stuart Brady

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH 1/7] tcg: Define "deposit" as an optional operation.
  2011-01-07 22:42 ` [Qemu-devel] [PATCH 1/7] tcg: Define "deposit" as an optional operation Richard Henderson
  2011-01-07 23:48   ` Stuart Brady
@ 2011-01-09 21:38   ` Aurelien Jarno
  2011-01-09 22:45     ` Richard Henderson
  1 sibling, 1 reply; 24+ messages in thread
From: Aurelien Jarno @ 2011-01-09 21:38 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, Alexander Graf

On Fri, Jan 07, 2011 at 02:42:57PM -0800, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/README    |   14 ++++++++++++++
>  tcg/tcg-op.h  |   40 ++++++++++++++++++++++++++++++++++++++++
>  tcg/tcg-opc.h |    6 ++++++
>  tcg/tcg.c     |   13 +++++++++++++
>  4 files changed, 73 insertions(+), 0 deletions(-)
> 
> diff --git a/tcg/README b/tcg/README
> index 68d27ff..ef59070 100644
> --- a/tcg/README
> +++ b/tcg/README
> @@ -285,6 +285,20 @@ the four high order bytes are set to zero.
>  Indicate that the value of t0 won't be used later. It is useful to
>  force dead code elimination.
>  
> +* deposit_i32/i64 dest, t1, t2, loc
> +
> +Deposit T2 as a bitfield into T1, placing the result in DEST.
> +The bitfield is described by LOC, an immediate value:
> +
> +  bits 0:7  - the length of the bitfield
> +  bits 8:15 - the position of the first bit
> +
> +For example, 0x101 indicates a 1-bit field at bit 1.
> +This operation would be equivalent to
> +
> +  dest = (t1 & ~2) | ((t2 << 1) & 2)
> +
> +

The encoding of the constant part actually doesn't match the one in the
C function where two arguments are separated and not encoded. What about
adding a tcg_gen_op5ii_i32/64 function and having the two arguments
separated?

Also what about a shorter name, like for example 'dep' as it is used on
some architectures.

>  ********* Conditional moves
>  
>  * setcond_i32/i64 cond, dest, t1, t2
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index 3ee0a58..c5a019a 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -2071,6 +2071,44 @@ static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
>      }
>  }
>  
> +static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1,
> +				       TCGv_i32 arg2, unsigned int ofs,
> +				       unsigned int len)
> +{
> +#ifdef TCG_TARGET_HAS_deposit_i32
> +  tcg_gen_op4i_i32(INDEX_op_deposit_i32, ret, arg1, arg2, (ofs << 8) | len);
> +#else
> +  uint32_t mask = (1u << len) - 1;
> +  TCGv_i32 t1 = tcg_temp_new_i32 ();
> +
> +  tcg_gen_andi_i32(t1, arg2, mask);
> +  tcg_gen_shli_i32(t1, t1, ofs);
> +  tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
> +  tcg_gen_or_i32(ret, ret, t1);
> +
> +  tcg_temp_free_i32(t1);
> +#endif
> +}
> +
> +static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
> +				       TCGv_i64 arg2, unsigned int ofs,
> +				       unsigned int len)
> +{
> +#ifdef TCG_TARGET_HAS_deposit_i64
> +  tcg_gen_op4i_i64(INDEX_op_deposit_i64, ret, arg1, arg2, (ofs << 8) | len);
> +#else
> +  uint64_t mask = (1ull << len) - 1;
> +  TCGv_i64 t1 = tcg_temp_new_i64 ();
> +
> +  tcg_gen_andi_i64(t1, arg2, mask);
> +  tcg_gen_shli_i64(t1, t1, ofs);
> +  tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
> +  tcg_gen_or_i64(ret, ret, t1);
> +
> +  tcg_temp_free_i64(t1);
> +#endif
> +}
> +
>  /***************************************/
>  /* QEMU specific operations. Their type depend on the QEMU CPU
>     type. */
> @@ -2384,6 +2422,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
>  #define tcg_gen_rotli_tl tcg_gen_rotli_i64
>  #define tcg_gen_rotr_tl tcg_gen_rotr_i64
>  #define tcg_gen_rotri_tl tcg_gen_rotri_i64
> +#define tcg_gen_deposit_tl tcg_gen_deposit_i64
>  #define tcg_const_tl tcg_const_i64
>  #define tcg_const_local_tl tcg_const_local_i64
>  #else
> @@ -2454,6 +2493,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
>  #define tcg_gen_rotli_tl tcg_gen_rotli_i32
>  #define tcg_gen_rotr_tl tcg_gen_rotr_i32
>  #define tcg_gen_rotri_tl tcg_gen_rotri_i32
> +#define tcg_gen_deposit_tl tcg_gen_deposit_i32
>  #define tcg_const_tl tcg_const_i32
>  #define tcg_const_local_tl tcg_const_local_i32
>  #endif
> diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
> index 2a98fed..ded6311 100644
> --- a/tcg/tcg-opc.h
> +++ b/tcg/tcg-opc.h
> @@ -78,6 +78,9 @@ DEF(sar_i32, 1, 2, 0, 0)
>  DEF(rotl_i32, 1, 2, 0, 0)
>  DEF(rotr_i32, 1, 2, 0, 0)
>  #endif
> +#ifdef TCG_TARGET_HAS_deposit_i32
> +DEF(deposit_i32, 1, 2, 1, 0)
> +#endif
>  
>  DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
>  #if TCG_TARGET_REG_BITS == 32
> @@ -168,6 +171,9 @@ DEF(sar_i64, 1, 2, 0, 0)
>  DEF(rotl_i64, 1, 2, 0, 0)
>  DEF(rotr_i64, 1, 2, 0, 0)
>  #endif
> +#ifdef TCG_TARGET_HAS_deposit_i64
> +DEF(deposit_i64, 1, 2, 1, 0)
> +#endif
>  
>  DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
>  #ifdef TCG_TARGET_HAS_ext8s_i64
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 5dd6a2c..e95a42f 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -956,6 +956,19 @@ void tcg_dump_ops(TCGContext *s, FILE *outfile)
>                      fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]);
>                  i = 1;
>                  break;
> +#if defined(TCG_TARGET_HAS_deposit_i32) || defined(TCG_TARGET_HAS_deposit_i64)
> +# ifdef TCG_TARGET_HAS_deposit_i32
> +            case INDEX_op_deposit_i32:
> +# endif
> +# ifdef TCG_TARGET_HAS_deposit_i64
> +            case INDEX_op_deposit_i64:
> +# endif
> +                fprintf(outfile, ",%u,%u", (unsigned)args[k] >> 8,
> +                        (unsigned)args[k] & 0xff);
> +                k++;
> +                i = 1;
> +                break;
> +#endif

Having two constants in the op would avoid this special code.

>              default:
>                  i = 0;
>                  break;
> -- 
> 1.7.2.3
> 
> 
> 

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Qemu-devel] Re: [PATCH 5/7] tcg-i386: Implement deposit operation.
  2011-01-07 22:43 ` [Qemu-devel] [PATCH 5/7] tcg-i386: " Richard Henderson
@ 2011-01-09 21:53   ` Aurelien Jarno
  2011-01-09 22:55     ` Richard Henderson
  0 siblings, 1 reply; 24+ messages in thread
From: Aurelien Jarno @ 2011-01-09 21:53 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, Alexander Graf

On Fri, Jan 07, 2011 at 02:43:01PM -0800, Richard Henderson wrote:
> Special case deposits that are implementable with byte and word stores.
> Otherwise implement with double-word shift plus rotates.
> 
> Expose tcg_reg_alloc to the backend for allocation of scratch registers.
> There's an edge condition that cannot actually happen at the moment due
> to a bug elsewhere in the register allocator, but it doesn't seem right
> to leave that unfixed.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/i386/tcg-target.c |   68 ++++++++++++++++++++++++++++++++++++++++++++++--
>  tcg/i386/tcg-target.h |    2 +
>  tcg/tcg.c             |    2 +
>  3 files changed, 69 insertions(+), 3 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index bb19a95..cc7d266 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -258,7 +258,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
>  #define OPC_JMP_long	(0xe9)
>  #define OPC_JMP_short	(0xeb)
>  #define OPC_LEA         (0x8d)
> -#define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
> +#define OPC_MOVB_EbGb	(0x88)		/* stores, more or less */
> +#define OPC_MOVB_GbEb   (0x8a)		/* loads, more or less */
>  #define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
>  #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
>  #define OPC_MOVL_EvIz	(0xc7)
> @@ -277,6 +278,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
>  #define OPC_SHIFT_1	(0xd1)
>  #define OPC_SHIFT_Ib	(0xc1)
>  #define OPC_SHIFT_cl	(0xd3)
> +#define OPC_SHRD_Ib	(0xac | P_EXT)
>  #define OPC_TESTL	(0x85)
>  #define OPC_XCHG_ax_r32	(0x90)
>  
> @@ -710,6 +712,59 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
>      }
>  }
>  
> +static void tcg_out_deposit(TCGContext *s, int inout, int val,
> +                            unsigned ofs, unsigned len, int rexw)
> +{
> +    /* Look for MOVW special case.  */
> +    if (ofs == 0 && len == 16) {
> +        tcg_out_modrm(s, OPC_MOVL_GvEv + P_DATA16, inout, val);
> +        return;
> +    }
> +
> +    /* Look for MOVB w/ %reg_l special case.  */
> +    if (ofs == 0 && len == 8
> +        && (TCG_TARGET_REG_BITS == 64 || (inout < 4 && val < 4))) {
> +        tcg_out_modrm(s, OPC_MOVB_GbEb + P_REXB_R + P_REXB_RM, inout, val);
> +        return;
> +    }
> +
> +    /* Look for MOVB w/ %reg_h special case.  */
> +    if (ofs == 8 && len == 8 && inout < 4 && val < 4) {
> +        tcg_out_modrm(s, OPC_MOVB_GbEb, inout + 4, val);
> +        return;
> +    }
> +
> +    /* If we have a real deposit from self, we need a temporary.  */
> +    /* ??? There really ought to be a way to easily allocate a scratch.  */
> +    if (inout == val) {
> +        TCGType type = rexw ? TCG_TYPE_I64 : TCG_TYPE_I32;
> +        TCGRegSet inuse = s->reserved_regs;
> +
> +        tcg_regset_set_reg(inuse, inout);
> +        val = tcg_reg_alloc(s, tcg_target_available_regs[type], inuse);
> +
> +        tcg_out_mov(s, type, val, inout);

I am a bit worried by allocating a new register here, especially on the
i386 target, where the number of free registers is quite low, and often
0. We already had to tweak some code to avoid calls to tcg_abort() due
to missing registers.

> +    }
> +
> +    /* Arrange for the field to be at offset 0.  */
> +    if (ofs != 0) {
> +        tcg_out_shifti(s, SHIFT_ROR + rexw, inout, ofs);
> +    }
> +
> +    /* Shift the value into the top of the word.  This shifts the old
> +       field out of the bottom of the word and leaves us with the whole
> +       word rotated right by the size of the field.  */
> +    tcg_out_modrm(s, OPC_SHRD_Ib + rexw, val, inout);
> +    tcg_out8(s, len);
> +
> +    /* Restore the field to its proper location.  */
> +    ofs = (len + ofs) & (rexw ? 63 : 31);
> +    if (ofs != 0) {
> +        tcg_out_shifti(s, SHIFT_ROL + rexw, inout, ofs);
> +    }
> +}
> +
> +
>  /* Use SMALL != 0 to force a short forward branch.  */
>  static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
>  {
> @@ -1266,7 +1321,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
>  
>      switch (sizeop) {
>      case 0:
> -        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
> +        tcg_out_modrm_offset(s, OPC_MOVB_EbGb + P_REXB_R, datalo, base, ofs);
>          break;
>      case 1:
>          if (bswap) {
> @@ -1504,7 +1559,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          break;
>  
>      OP_32_64(st8):
> -        tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
> +        tcg_out_modrm_offset(s, OPC_MOVB_EbGb | P_REXB_R,
>                               args[0], args[1], args[2]);
>          break;
>      OP_32_64(st16):
> @@ -1603,6 +1658,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          }
>          break;
>  
> +    OP_32_64(deposit):
> +        tcg_out_deposit(s, args[0], args[2],
> +                        args[3] >> 8, args[3] & 0xff, rexw);
> +        break;
> +
>      case INDEX_op_brcond_i32:
>          tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
>                           args[3], 0);
> @@ -1783,6 +1843,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
>      { INDEX_op_sar_i32, { "r", "0", "ci" } },
>      { INDEX_op_rotl_i32, { "r", "0", "ci" } },
>      { INDEX_op_rotr_i32, { "r", "0", "ci" } },
> +    { INDEX_op_deposit_i32, { "r", "0", "r" } },
>  
>      { INDEX_op_brcond_i32, { "r", "ri" } },
>  
> @@ -1835,6 +1896,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
>      { INDEX_op_sar_i64, { "r", "0", "ci" } },
>      { INDEX_op_rotl_i64, { "r", "0", "ci" } },
>      { INDEX_op_rotr_i64, { "r", "0", "ci" } },
> +    { INDEX_op_deposit_i64, { "r", "0", "r" } },
>  
>      { INDEX_op_brcond_i64, { "r", "re" } },
>      { INDEX_op_setcond_i64, { "r", "r", "re" } },
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index bfafbfc..9f90d17 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -77,6 +77,7 @@ enum {
>  /* optional instructions */
>  #define TCG_TARGET_HAS_div2_i32
>  #define TCG_TARGET_HAS_rot_i32
> +#define TCG_TARGET_HAS_deposit_i32
>  #define TCG_TARGET_HAS_ext8s_i32
>  #define TCG_TARGET_HAS_ext16s_i32
>  #define TCG_TARGET_HAS_ext8u_i32
> @@ -94,6 +95,7 @@ enum {
>  #if TCG_TARGET_REG_BITS == 64
>  #define TCG_TARGET_HAS_div2_i64
>  #define TCG_TARGET_HAS_rot_i64
> +#define TCG_TARGET_HAS_deposit_i64
>  #define TCG_TARGET_HAS_ext8s_i64
>  #define TCG_TARGET_HAS_ext16s_i64
>  #define TCG_TARGET_HAS_ext32s_i64
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index e95a42f..5ab9122 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -156,6 +156,8 @@ int gen_new_label(void)
>      return idx;
>  }
>  
> +static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2);
> +
>  #include "tcg-target.c"
>  
>  /* pool based memory allocation */
> -- 
> 1.7.2.3
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Qemu-devel] Re: [PATCH 4/7] tcg-ia64: Implement deposit operation.
  2011-01-07 22:43 ` [Qemu-devel] [PATCH 4/7] tcg-ia64: " Richard Henderson
@ 2011-01-09 22:04   ` Aurelien Jarno
  0 siblings, 0 replies; 24+ messages in thread
From: Aurelien Jarno @ 2011-01-09 22:04 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, Alexander Graf

On Fri, Jan 07, 2011 at 02:43:00PM -0800, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c |   92 +++++++++++++++++++++++++++++++++++++++++++++++++
>  tcg/ia64/tcg-target.h |    2 +
>  2 files changed, 94 insertions(+), 0 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index 3ddf434..2708d55 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -237,6 +237,7 @@ enum {
>      OPC_CMP4_LT_A6            = 0x18400000000ull,
>      OPC_CMP4_LTU_A6           = 0x1a400000000ull,
>      OPC_CMP4_EQ_A6            = 0x1c400000000ull,
> +    OPC_DEP_I15               = 0x08000000000ull,
>      OPC_DEP_Z_I12             = 0x0a600000000ull,
>      OPC_EXTR_I11              = 0x0a400002000ull,
>      OPC_EXTR_U_I11            = 0x0a400000000ull,
> @@ -508,6 +509,19 @@ static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1,
>             | (qp & 0x3f);
>  }
>  
> +static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1,
> +                                   int r2, int r3, int len, uint64_t len,
> +                                   uint64_t cpos)
> +{
> +    return opc
> +           | ((cpos & 0x3f) << 31)
> +           | ((len & 0x0f) << 27)
> +           | ((r3 & 0x7f) << 20)
> +           | ((r2 & 0x7f) << 13)
> +           | ((r1 & 0x7f) << 6)
> +           | (qp & 0x3f);
> +}
> +
>  static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm)
>  {
>      return opc
> @@ -1335,6 +1349,73 @@ static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
>                     tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb));
>  }
>  
> +static void tcg_out_deposit_i32(TCGContext *s, TCGArg out, TCGArg in,
> +                                TCGArg val, unsigned ofs, unsigned len)
> +{
> +    uint64_t nop_m = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
> +
> +    if (val == 0) {
> +        tcg_out_bundle(s, mmI, nop_m, nop_m,
> +                       tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, out, in,
> +                                   len - 1, 63 - ofs));
> +    } else if (len <= 16) {
> +        tcg_out_bundle(s, mmI, nop_m, nop_m,
> +                       tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, out, in, val,
> +                                   len - 1, 63 - ofs));
> +    } else {
> +        tcg_out_bundle(s, miI, nop_m,
> +                       tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2,
> +                                   val, 16, 31 - 16),
> +                       tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, out, in, val,
> +                                   16 - 1, 63 - ofs));
> +        tcg_out_bundle(s, mmI, nop_m, nop_m,
> +                       tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, out, out,
> +                                   TCG_REG_R2, len - 16 - 1, 63 - (ofs + 16)));
> +    }
> +}
> +
> +static void tcg_out_deposit_i64(TCGContext *s, TCGArg out, TCGArg in,
> +                                TCGArg val, unsigned ofs, unsigned len)
> +{
> +    uint64_t nop_m = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
> +
> +    if (val == 0) {
> +        tcg_out_bundle(s, mmI, nop_m, nop_m,
> +                       tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, out, in,
> +                                   len - 1, 63 - ofs));
> +    } else if (len <= 16) {
> +        tcg_out_bundle(s, mmI, nop_m, nop_m,
> +                       tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, out, in, val,
> +                                   len - 1, 63 - ofs));
> +    } else {
> +        uint64_t ror = 0, shrp, rol = 0;
> +
> +        if (ofs) {
> +	    ror = tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, TCG_REG_R2,
> +                              in, in, ofs);
> +            in = TCG_REG_R2;
> +        }
> +
> +        shrp = tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, out, in, val, len);
> +
> +        ofs = (ofs - len) & 63;
> +        if (ofs) {
> +            rol = tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, out, out, out, 64-ofs);
> +        }
> +
> +        if (ror) {
> +            tcg_out_bundle(s, mII, nop_m, ror, shrp);
> +            if (rol) {
> +                tcg_out_bundle(s, mmI, nop_m, nop_m, rol);
> +            }
> +        } else if (rol) {
> +            tcg_out_bundle(s, mII, nop_m, shrp, rol);
> +        } else {
> +            tcg_out_bundle(s, mmI, nop_m, nop_m, shrp);
> +        }
> +    }
> +}
> +
>  static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1,
>                                       TCGArg arg2, int cmp4)
>  {
> @@ -2063,6 +2144,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          tcg_out_rotr_i64(s, args[0], args[1], args[2], const_args[2]);
>          break;
>  
> +    case INDEX_op_deposit_i32:
> +        tcg_out_deposit_i32(s, args[0], args[1], args[2],
> +                            (args[3] >> 8) & 31, args[3] & 31);
> +        break;
> +    case INDEX_op_deposit_i64:
> +        tcg_out_deposit_i64(s, args[0], args[1], args[2],
> +                            (args[3] >> 8) & 63, args[3] & 63);
> +        break;
> +
>      case INDEX_op_ext8s_i32:
>      case INDEX_op_ext8s_i64:
>          tcg_out_ext(s, OPC_SXT1_I29, args[0], args[1]);
> @@ -2192,6 +2282,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
>      { INDEX_op_shr_i32, { "r", "rZ", "ri" } },
>      { INDEX_op_rotl_i32, { "r", "rZ", "ri" } },
>      { INDEX_op_rotr_i32, { "r", "rZ", "ri" } },
> +    { INDEX_op_deposit_i32, { "r", "rZ", "rZ" } },
>  
>      { INDEX_op_ext8s_i32, { "r", "rZ"} },
>      { INDEX_op_ext8u_i32, { "r", "rZ"} },
> @@ -2238,6 +2329,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
>      { INDEX_op_shr_i64, { "r", "rZ", "ri" } },
>      { INDEX_op_rotl_i64, { "r", "rZ", "ri" } },
>      { INDEX_op_rotr_i64, { "r", "rZ", "ri" } },
> +    { INDEX_op_deposit_i64, { "r", "rZ", "rZ" } },
>  
>      { INDEX_op_ext8s_i64, { "r", "rZ"} },
>      { INDEX_op_ext8u_i64, { "r", "rZ"} },
> diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
> index e56e88f..80e3534 100644
> --- a/tcg/ia64/tcg-target.h
> +++ b/tcg/ia64/tcg-target.h
> @@ -131,6 +131,8 @@ enum {
>  #define TCG_TARGET_HAS_orc_i64
>  #define TCG_TARGET_HAS_rot_i32
>  #define TCG_TARGET_HAS_rot_i64
> +#define TCG_TARGET_HAS_deposit_i32
> +#define TCG_TARGET_HAS_deposit_i64
>  
>  /* optional instructions automatically implemented */
>  #undef TCG_TARGET_HAS_neg_i32   /* sub r1, r0, r3 */

I haven't tested it yet, but it looks ok.


-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH 1/7] tcg: Define "deposit" as an optional operation.
  2011-01-09 21:38   ` Aurelien Jarno
@ 2011-01-09 22:45     ` Richard Henderson
  0 siblings, 0 replies; 24+ messages in thread
From: Richard Henderson @ 2011-01-09 22:45 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel, Alexander Graf

On 01/09/2011 01:38 PM, Aurelien Jarno wrote:
> The encoding of the constant part actually doesn't match the one in the
> C function where two arguments are separated and not encoded. What about
> adding a tcg_gen_op5ii_i32/64 function and having the two arguments
> separated?

I certainly didn't want to expose the combined argument to the target
translators.  I had thought that saving some memory by combining the
two constants would be a good idea, but perhaps you're right that it's
overly complicated for the benefit.

> Also what about a shorter name, like for example 'dep' as it is used on
> some architectures.

I suppose.  I thought perhaps that would be too cryptic though.


r~

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] Re: [PATCH 5/7] tcg-i386: Implement deposit operation.
  2011-01-09 21:53   ` [Qemu-devel] " Aurelien Jarno
@ 2011-01-09 22:55     ` Richard Henderson
  2011-01-10  0:16       ` Aurelien Jarno
  2011-01-10 18:37       ` Aurelien Jarno
  0 siblings, 2 replies; 24+ messages in thread
From: Richard Henderson @ 2011-01-09 22:55 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel, Alexander Graf

On 01/09/2011 01:53 PM, Aurelien Jarno wrote:
>> +    if (inout == val) {
>> +        TCGType type = rexw ? TCG_TYPE_I64 : TCG_TYPE_I32;
>> +        TCGRegSet inuse = s->reserved_regs;
>> +
>> +        tcg_regset_set_reg(inuse, inout);
>> +        val = tcg_reg_alloc(s, tcg_target_available_regs[type], inuse);
>> +
>> +        tcg_out_mov(s, type, val, inout);
> 
> I am a bit worried by allocating a new register here, especially on the
> i386 target, where the number of free registers is quite low, and often
> 0. We already had to tweak some code to avoid calls to tcg_abort() due
> to missing registers.

Well, as I said, this case can't actually trigger due to a bug in the
register allocator.  This can be seen in an insn like

	mov	%dl,%dh

where you would expect to see

	deposit	x,x,x,8,8

however, the matching constraint forces the destination and the matching
source into a new register:

                /* if the input is aliased to an output and if it is
                   not dead after the instruction, we must allocate
                   a new register and move it */
                if (!IS_DEAD_IARG(i - nb_oargs)) 
                    goto allocate_in_reg;

which means that we'll always see

	mov	y,x
	deposit y,y,x,8,8

So I could simply put a tcg_abort there.  It would be up to whoever
improves the register allocator to provide some mechanism for a
backend to allocate a scratch.  What do you think?


r~

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] Re: [PATCH 5/7] tcg-i386: Implement deposit operation.
  2011-01-09 22:55     ` Richard Henderson
@ 2011-01-10  0:16       ` Aurelien Jarno
  2011-01-10  0:43         ` Richard Henderson
  2011-01-10 18:37       ` Aurelien Jarno
  1 sibling, 1 reply; 24+ messages in thread
From: Aurelien Jarno @ 2011-01-10  0:16 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, Alexander Graf

On Sun, Jan 09, 2011 at 02:55:13PM -0800, Richard Henderson wrote:
> On 01/09/2011 01:53 PM, Aurelien Jarno wrote:
> >> +    if (inout == val) {
> >> +        TCGType type = rexw ? TCG_TYPE_I64 : TCG_TYPE_I32;
> >> +        TCGRegSet inuse = s->reserved_regs;
> >> +
> >> +        tcg_regset_set_reg(inuse, inout);
> >> +        val = tcg_reg_alloc(s, tcg_target_available_regs[type], inuse);
> >> +
> >> +        tcg_out_mov(s, type, val, inout);
> > 
> > I am a bit worried by allocating a new register here, especially on the
> > i386 target, where the number of free registers is quite low, and often
> > 0. We already had to tweak some code to avoid calls to tcg_abort() due
> > to missing registers.
> 
> Well, as I said, this case can't actually trigger due to a bug in the
> register allocator.  This can be seen in an insn like
> 
> 	mov	%dl,%dh
> 
> where you would expect to see
> 
> 	deposit	x,x,x,8,8
> 
> however, the matching constraint forces the destination and the matching
> source into a new register:
> 
>                 /* if the input is aliased to an output and if it is
>                    not dead after the instruction, we must allocate
>                    a new register and move it */
>                 if (!IS_DEAD_IARG(i - nb_oargs)) 
>                     goto allocate_in_reg;

I guess we should also add a case when the input argument and the output
argument are the same.

> which means that we'll always see
> 
> 	mov	y,x
> 	deposit y,y,x,8,8
> 
> So I could simply put a tcg_abort there.  It would be up to whoever
> improves the register allocator to provide some mechanism for a
> backend to allocate a scratch.  What do you think?
> 

The code being written now or latter doesn't change the question to know
if it is always possible to allocate one scratch register here on i386.

Up to know we didn't provide any function for allocating a scratch
register, as on CISC CPU the op directly match to an instruction whereas
on RISC CPUs where the op has to be emulated with multiple instructions,
they are enough registers available to define one or more permanent
scratch register.

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] Re: [PATCH 5/7] tcg-i386: Implement deposit operation.
  2011-01-10  0:16       ` Aurelien Jarno
@ 2011-01-10  0:43         ` Richard Henderson
  2011-01-10 16:52           ` Aurelien Jarno
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Henderson @ 2011-01-10  0:43 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel, Alexander Graf

On 01/09/2011 04:16 PM, Aurelien Jarno wrote:
> The code being written now or latter doesn't change the question to know
> if it is always possible to allocate one scratch register here on i386.

Yes.

Here there's only one register that needs to remain live.  In the 
worst case, we'll spill one live register.  Which we can always do
via a simple store via AREG0 + offset.


r~

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] Re: [PATCH 5/7] tcg-i386: Implement deposit operation.
  2011-01-10  0:43         ` Richard Henderson
@ 2011-01-10 16:52           ` Aurelien Jarno
  0 siblings, 0 replies; 24+ messages in thread
From: Aurelien Jarno @ 2011-01-10 16:52 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, Alexander Graf

On Sun, Jan 09, 2011 at 04:43:22PM -0800, Richard Henderson wrote:
> On 01/09/2011 04:16 PM, Aurelien Jarno wrote:
> > The code being written now or latter doesn't change the question to know
> > if it is always possible to allocate one scratch register here on i386.
> 
> Yes.
> 
> Here there's only one register that needs to remain live.  In the 
> worst case, we'll spill one live register.  Which we can always do
> via a simple store via AREG0 + offset.

Oh right correct we are inside the instruction and in a case we have
very few arguments, and only 32-bit.

If we allow the target to allocate scratch registers themselves, it's
probably better to do that via a function.

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] Re: [PATCH 5/7] tcg-i386: Implement deposit operation.
  2011-01-09 22:55     ` Richard Henderson
  2011-01-10  0:16       ` Aurelien Jarno
@ 2011-01-10 18:37       ` Aurelien Jarno
  2011-01-10 19:19         ` Richard Henderson
  1 sibling, 1 reply; 24+ messages in thread
From: Aurelien Jarno @ 2011-01-10 18:37 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, Alexander Graf

On Sun, Jan 09, 2011 at 02:55:13PM -0800, Richard Henderson wrote:
> On 01/09/2011 01:53 PM, Aurelien Jarno wrote:
> >> +    if (inout == val) {
> >> +        TCGType type = rexw ? TCG_TYPE_I64 : TCG_TYPE_I32;
> >> +        TCGRegSet inuse = s->reserved_regs;
> >> +
> >> +        tcg_regset_set_reg(inuse, inout);
> >> +        val = tcg_reg_alloc(s, tcg_target_available_regs[type], inuse);
> >> +
> >> +        tcg_out_mov(s, type, val, inout);
> > 
> > I am a bit worried by allocating a new register here, especially on the
> > i386 target, where the number of free registers is quite low, and often
> > 0. We already had to tweak some code to avoid calls to tcg_abort() due
> > to missing registers.
> 
> Well, as I said, this case can't actually trigger due to a bug in the
> register allocator.  This can be seen in an insn like
> 
> 	mov	%dl,%dh
> 
> where you would expect to see
> 
> 	deposit	x,x,x,8,8
> 
> however, the matching constraint forces the destination and the matching
> source into a new register:
> 
>                 /* if the input is aliased to an output and if it is
>                    not dead after the instruction, we must allocate
>                    a new register and move it */
>                 if (!IS_DEAD_IARG(i - nb_oargs)) 
>                     goto allocate_in_reg;

I have not been able to trigger this code path with a deposit
instruction.

> which means that we'll always see
> 
> 	mov	y,x
> 	deposit y,y,x,8,8
> 
> So I could simply put a tcg_abort there.  It would be up to whoever
> improves the register allocator to provide some mechanism for a
> backend to allocate a scratch.  What do you think?
> 

Do you have a way to trigger this problem? or a dump of the ops and asm
output?

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] Re: [PATCH 5/7] tcg-i386: Implement deposit operation.
  2011-01-10 18:37       ` Aurelien Jarno
@ 2011-01-10 19:19         ` Richard Henderson
  0 siblings, 0 replies; 24+ messages in thread
From: Richard Henderson @ 2011-01-10 19:19 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel, Alexander Graf

On 01/10/2011 10:37 AM, Aurelien Jarno wrote:
>> 	mov	y,x
>> 	deposit y,y,x,8,8
>>
>> So I could simply put a tcg_abort there.  It would be up to whoever
>> improves the register allocator to provide some mechanism for a
>> backend to allocate a scratch.  What do you think?
>>
> 
> Do you have a way to trigger this problem? or a dump of the ops and asm
> output?

IN: 
0x408120c4:  rlwimi  r4,r4,8,16,23

OP:
 ---- 0x408120c4
 deposit_i32 r4,r4,r4,8,8
 goto_tb $0x0
 movi_i32 nip,$0x408120c8
 exit_tb $0x7fbb00ca5758

OUT: [size=52]
0x60294380:  mov    0x10(%r14),%ebp
0x60294384:  mov    %ebp,%ebx
0x60294386:  ror    $0x8,%ebp
0x60294389:  shrd   $0x8,%ebx,%ebp
0x6029438d:  rol    $0x10,%ebp
0x60294390:  mov    %ebp,0x10(%r14)
0x60294394:  jmpq   0x60294399
0x60294399:  mov    $0x408120c8,%ebp
0x6029439e:  mov    %ebp,0x25c(%r14)
0x602943a5:  mov    $0x7fbb00ca5758,%rax
0x602943af:  jmpq   0x622b772e

That should do it.  This is present in linux-user-test ppc/ls.
This output still contains that allocate-a-scratch path.


r~

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2011-01-10 20:29 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-01-07 22:42 [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation Richard Henderson
2011-01-07 22:42 ` [Qemu-devel] [PATCH 1/7] tcg: Define "deposit" as an optional operation Richard Henderson
2011-01-07 23:48   ` Stuart Brady
2011-01-09 21:38   ` Aurelien Jarno
2011-01-09 22:45     ` Richard Henderson
2011-01-07 22:42 ` [Qemu-devel] [PATCH 2/7] tcg-ppc: Implement deposit operation Richard Henderson
2011-01-07 23:33   ` [Qemu-devel] " malc
2011-01-07 22:42 ` [Qemu-devel] [PATCH 3/7] tcg-hppa: " Richard Henderson
2011-01-07 23:35   ` Stuart Brady
2011-01-07 23:41     ` Richard Henderson
2011-01-07 22:43 ` [Qemu-devel] [PATCH 4/7] tcg-ia64: " Richard Henderson
2011-01-09 22:04   ` [Qemu-devel] " Aurelien Jarno
2011-01-07 22:43 ` [Qemu-devel] [PATCH 5/7] tcg-i386: " Richard Henderson
2011-01-09 21:53   ` [Qemu-devel] " Aurelien Jarno
2011-01-09 22:55     ` Richard Henderson
2011-01-10  0:16       ` Aurelien Jarno
2011-01-10  0:43         ` Richard Henderson
2011-01-10 16:52           ` Aurelien Jarno
2011-01-10 18:37       ` Aurelien Jarno
2011-01-10 19:19         ` Richard Henderson
2011-01-07 22:43 ` [Qemu-devel] [PATCH 6/7] target-i386: Use " Richard Henderson
2011-01-07 22:43 ` [Qemu-devel] [PATCH 7/7] target-ppc: " Richard Henderson
2011-01-07 23:10 ` [Qemu-devel] [PATCH 0/7] Define "deposit" tcg operation Peter Maydell
2011-01-07 23:37   ` Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.