All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg
@ 2018-09-11 20:28 Emilio G. Cota
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext Emilio G. Cota
                   ` (13 more replies)
  0 siblings, 14 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

v2: https://lists.gnu.org/archive/html/qemu-devel/2018-09/msg01122.html

Changes since v2:

- Add rth's R-b tag to the last patch
- Drop v2's first 10 patches, since Paolo already picked those up
- Move TCG temps + x86_64_hregs to DisasContext
  + While at it, drop the cpu_ prefix from the TCG temps,
    e.g. cpu_A0 -> s->A0
  + Split the conversion into separate patches to ease review.
    The patches are quite boring and long because the temps
    are everywhere, and I had to add DisasContext *s to quite a few
    functions

The series is checkpatch-clean.

You can fetch these patches from:
  https://github.com/cota/qemu/tree/i386-mttcg-v3

Thanks,

		Emilio

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:44   ` Richard Henderson
  2018-09-13 14:21   ` Alex Bennée
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 " Emilio G. Cota
                   ` (12 subsequent siblings)
  13 siblings, 2 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 1f9d1d9b24..e9f512472e 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -73,7 +73,7 @@
 
 /* global register indexes */
 static TCGv cpu_A0;
-static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
+static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
 static TCGv cpu_seg_base[6];
@@ -135,6 +135,10 @@ typedef struct DisasContext {
     int cpuid_ext3_features;
     int cpuid_7_0_ebx_features;
     int cpuid_xsave_features;
+
+    /* TCG local temps */
+    TCGv cc_srcT;
+
     sigjmp_buf jmpbuf;
 } DisasContext;
 
@@ -244,7 +248,7 @@ static void set_cc_op(DisasContext *s, CCOp op)
         tcg_gen_discard_tl(cpu_cc_src2);
     }
     if (dead & USES_CC_SRCT) {
-        tcg_gen_discard_tl(cpu_cc_srcT);
+        tcg_gen_discard_tl(s->cc_srcT);
     }
 
     if (op == CC_OP_DYNAMIC) {
@@ -667,11 +671,11 @@ static inline void gen_op_testl_T0_T1_cc(void)
     tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
 }
 
-static void gen_op_update_neg_cc(void)
+static void gen_op_update_neg_cc(DisasContext *s)
 {
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
     tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
-    tcg_gen_movi_tl(cpu_cc_srcT, 0);
+    tcg_gen_movi_tl(s->cc_srcT, 0);
 }
 
 /* compute all eflags to cc_src */
@@ -742,7 +746,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
         t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
         /* If no temporary was used, be careful not to alias t1 and t0.  */
         t0 = t1 == cpu_cc_src ? cpu_tmp0 : reg;
-        tcg_gen_mov_tl(t0, cpu_cc_srcT);
+        tcg_gen_mov_tl(t0, s->cc_srcT);
         gen_extu(size, t0);
         goto add_sub;
 
@@ -899,7 +903,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         size = s->cc_op - CC_OP_SUBB;
         switch (jcc_op) {
         case JCC_BE:
-            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
+            tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
             gen_extu(size, cpu_tmp4);
             t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
@@ -912,7 +916,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         case JCC_LE:
             cond = TCG_COND_LE;
         fast_jcc_l:
-            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
+            tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
             gen_exts(size, cpu_tmp4);
             t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
             cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
@@ -1309,11 +1313,11 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     case OP_SUBL:
         if (s1->prefix & PREFIX_LOCK) {
             tcg_gen_neg_tl(cpu_T0, cpu_T1);
-            tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
+            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, cpu_A0, cpu_T0,
                                         s1->mem_index, ot | MO_LE);
-            tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
+            tcg_gen_sub_tl(cpu_T0, s1->cc_srcT, cpu_T1);
         } else {
-            tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
+            tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
             tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
@@ -1356,7 +1360,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_CMPL:
         tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
-        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
+        tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
         tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
@@ -4823,7 +4827,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     gen_op_mov_reg_v(ot, rm, cpu_T0);
                 }
             }
-            gen_op_update_neg_cc();
+            gen_op_update_neg_cc(s);
             set_cc_op(s, CC_OP_SUBB + ot);
             break;
         case 4: /* mul */
@@ -5283,7 +5287,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 }
             }
             tcg_gen_mov_tl(cpu_cc_src, oldv);
-            tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
+            tcg_gen_mov_tl(s->cc_srcT, cmpv);
             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
             set_cc_op(s, CC_OP_SUBB + ot);
             tcg_temp_free(oldv);
@@ -8463,7 +8467,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     cpu_tmp4 = tcg_temp_new();
     cpu_ptr0 = tcg_temp_new_ptr();
     cpu_ptr1 = tcg_temp_new_ptr();
-    cpu_cc_srcT = tcg_temp_local_new();
+    dc->cc_srcT = tcg_temp_local_new();
 }
 
 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:45   ` Richard Henderson
  2018-09-13 14:23   ` Alex Bennée
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 " Emilio G. Cota
                   ` (11 subsequent siblings)
  13 siblings, 2 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 472 ++++++++++++++++++++--------------------
 1 file changed, 236 insertions(+), 236 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index e9f512472e..c6b1baab9d 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -72,7 +72,6 @@
 //#define MACRO_TEST   1
 
 /* global register indexes */
-static TCGv cpu_A0;
 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
@@ -138,6 +137,7 @@ typedef struct DisasContext {
 
     /* TCG local temps */
     TCGv cc_srcT;
+    TCGv A0;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -395,9 +395,9 @@ static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
 
 static void gen_add_A0_im(DisasContext *s, int val)
 {
-    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+    tcg_gen_addi_tl(s->A0, s->A0, val);
     if (!CODE64(s)) {
-        tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+        tcg_gen_ext32u_tl(s->A0, s->A0);
     }
 }
 
@@ -431,7 +431,7 @@ static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 {
     if (d == OR_TMP0) {
-        gen_op_st_v(s, idx, cpu_T0, cpu_A0);
+        gen_op_st_v(s, idx, cpu_T0, s->A0);
     } else {
         gen_op_mov_reg_v(idx, d, cpu_T0);
     }
@@ -453,7 +453,7 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
 #ifdef TARGET_X86_64
     case MO_64:
         if (ovr_seg < 0) {
-            tcg_gen_mov_tl(cpu_A0, a0);
+            tcg_gen_mov_tl(s->A0, a0);
             return;
         }
         break;
@@ -464,14 +464,14 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
             ovr_seg = def_seg;
         }
         if (ovr_seg < 0) {
-            tcg_gen_ext32u_tl(cpu_A0, a0);
+            tcg_gen_ext32u_tl(s->A0, a0);
             return;
         }
         break;
     case MO_16:
         /* 16 bit address */
-        tcg_gen_ext16u_tl(cpu_A0, a0);
-        a0 = cpu_A0;
+        tcg_gen_ext16u_tl(s->A0, a0);
+        a0 = s->A0;
         if (ovr_seg < 0) {
             if (s->addseg) {
                 ovr_seg = def_seg;
@@ -488,13 +488,13 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
         TCGv seg = cpu_seg_base[ovr_seg];
 
         if (aflag == MO_64) {
-            tcg_gen_add_tl(cpu_A0, a0, seg);
+            tcg_gen_add_tl(s->A0, a0, seg);
         } else if (CODE64(s)) {
-            tcg_gen_ext32u_tl(cpu_A0, a0);
-            tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
+            tcg_gen_ext32u_tl(s->A0, a0);
+            tcg_gen_add_tl(s->A0, s->A0, seg);
         } else {
-            tcg_gen_add_tl(cpu_A0, a0, seg);
-            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+            tcg_gen_add_tl(s->A0, a0, seg);
+            tcg_gen_ext32u_tl(s->A0, s->A0);
         }
     }
 }
@@ -640,9 +640,9 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T0, s->A0);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_st_v(s, ot, cpu_T0, s->A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
     gen_op_add_reg_T0(s->aflag, R_EDI);
@@ -1072,7 +1072,7 @@ static inline void gen_stos(DisasContext *s, TCGMemOp ot)
 {
     gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_st_v(s, ot, cpu_T0, s->A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
 }
@@ -1080,7 +1080,7 @@ static inline void gen_stos(DisasContext *s, TCGMemOp ot)
 static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T0, s->A0);
     gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
@@ -1089,7 +1089,7 @@ static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 static inline void gen_scas(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T1, s->A0);
     gen_op(s, OP_CMPL, ot, R_EAX);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
@@ -1098,7 +1098,7 @@ static inline void gen_scas(DisasContext *s, TCGMemOp ot)
 static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T1, s->A0);
     gen_string_movl_A0_ESI(s);
     gen_op(s, OP_CMPL, ot, OR_TMP0);
     gen_op_movl_T0_Dshift(ot);
@@ -1128,11 +1128,11 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot)
     /* Note: we must do this dummy write first to be restartable in
        case of page fault. */
     tcg_gen_movi_tl(cpu_T0, 0);
-    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_st_v(s, ot, cpu_T0, s->A0);
     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
     gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
-    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_st_v(s, ot, cpu_T0, s->A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
     gen_bpt_io(s, cpu_tmp2_i32, ot);
@@ -1147,7 +1147,7 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
         gen_io_start();
     }
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T0, s->A0);
 
     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
@@ -1267,14 +1267,14 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     if (d != OR_TMP0) {
         gen_op_mov_v_reg(ot, cpu_T0, d);
     } else if (!(s1->prefix & PREFIX_LOCK)) {
-        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
     }
     switch(op) {
     case OP_ADCL:
         gen_compute_eflags_c(s1, cpu_tmp4);
         if (s1->prefix & PREFIX_LOCK) {
             tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
@@ -1289,7 +1289,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         if (s1->prefix & PREFIX_LOCK) {
             tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
             tcg_gen_neg_tl(cpu_T0, cpu_T0);
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
@@ -1301,7 +1301,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_ADDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
@@ -1313,7 +1313,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     case OP_SUBL:
         if (s1->prefix & PREFIX_LOCK) {
             tcg_gen_neg_tl(cpu_T0, cpu_T1);
-            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, cpu_A0, cpu_T0,
+            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, cpu_T0,
                                         s1->mem_index, ot | MO_LE);
             tcg_gen_sub_tl(cpu_T0, s1->cc_srcT, cpu_T1);
         } else {
@@ -1327,7 +1327,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     default:
     case OP_ANDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+            tcg_gen_atomic_and_fetch_tl(cpu_T0, s1->A0, cpu_T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
@@ -1338,7 +1338,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_ORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+            tcg_gen_atomic_or_fetch_tl(cpu_T0, s1->A0, cpu_T1,
                                        s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
@@ -1349,7 +1349,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_XORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+            tcg_gen_atomic_xor_fetch_tl(cpu_T0, s1->A0, cpu_T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
@@ -1372,13 +1372,13 @@ static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
 {
     if (s1->prefix & PREFIX_LOCK) {
         tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
-        tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+        tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
                                     s1->mem_index, ot | MO_LE);
     } else {
         if (d != OR_TMP0) {
             gen_op_mov_v_reg(ot, cpu_T0, d);
         } else {
-            gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
+            gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
         }
         tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
         gen_op_st_rm_T0_A0(s1, ot, d);
@@ -1441,7 +1441,7 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, s->A0);
     } else {
         gen_op_mov_v_reg(ot, cpu_T0, op1);
     }
@@ -1477,7 +1477,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, s->A0);
     else
         gen_op_mov_v_reg(ot, cpu_T0, op1);
 
@@ -1517,7 +1517,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, s->A0);
     } else {
         gen_op_mov_v_reg(ot, cpu_T0, op1);
     }
@@ -1603,7 +1603,7 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, s->A0);
     } else {
         gen_op_mov_v_reg(ot, cpu_T0, op1);
     }
@@ -1681,7 +1681,7 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, s->A0);
     else
         gen_op_mov_v_reg(ot, cpu_T0, op1);
     
@@ -1737,7 +1737,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, s->A0);
     } else {
         gen_op_mov_v_reg(ot, cpu_T0, op1);
     }
@@ -2052,7 +2052,7 @@ static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
 }
 
 /* Compute the address, with a minimum number of TCG ops.  */
-static TCGv gen_lea_modrm_1(AddressParts a)
+static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
 {
     TCGv ea = NULL;
 
@@ -2060,22 +2060,22 @@ static TCGv gen_lea_modrm_1(AddressParts a)
         if (a.scale == 0) {
             ea = cpu_regs[a.index];
         } else {
-            tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
-            ea = cpu_A0;
+            tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
+            ea = s->A0;
         }
         if (a.base >= 0) {
-            tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
-            ea = cpu_A0;
+            tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
+            ea = s->A0;
         }
     } else if (a.base >= 0) {
         ea = cpu_regs[a.base];
     }
     if (!ea) {
-        tcg_gen_movi_tl(cpu_A0, a.disp);
-        ea = cpu_A0;
+        tcg_gen_movi_tl(s->A0, a.disp);
+        ea = s->A0;
     } else if (a.disp != 0) {
-        tcg_gen_addi_tl(cpu_A0, ea, a.disp);
-        ea = cpu_A0;
+        tcg_gen_addi_tl(s->A0, ea, a.disp);
+        ea = s->A0;
     }
 
     return ea;
@@ -2084,7 +2084,7 @@ static TCGv gen_lea_modrm_1(AddressParts a)
 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
 {
     AddressParts a = gen_lea_modrm_0(env, s, modrm);
-    TCGv ea = gen_lea_modrm_1(a);
+    TCGv ea = gen_lea_modrm_1(s, a);
     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
 }
 
@@ -2097,7 +2097,7 @@ static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
                       TCGCond cond, TCGv_i64 bndv)
 {
-    TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
+    TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
 
     tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
     if (!CODE64(s)) {
@@ -2111,7 +2111,7 @@ static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
 /* used for LEA and MOV AX, mem */
 static void gen_add_A0_ds_seg(DisasContext *s)
 {
-    gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
+    gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
 }
 
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
@@ -2138,9 +2138,9 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
         if (is_store) {
             if (reg != OR_TMP0)
                 gen_op_mov_v_reg(ot, cpu_T0, reg);
-            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+            gen_op_st_v(s, ot, cpu_T0, s->A0);
         } else {
-            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, ot, cpu_T0, s->A0);
             if (reg != OR_TMP0)
                 gen_op_mov_reg_v(ot, reg, cpu_T0);
         }
@@ -2334,19 +2334,19 @@ static void gen_push_v(DisasContext *s, TCGv val)
     TCGMemOp d_ot = mo_pushpop(s, s->dflag);
     TCGMemOp a_ot = mo_stacksize(s);
     int size = 1 << d_ot;
-    TCGv new_esp = cpu_A0;
+    TCGv new_esp = s->A0;
 
-    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
+    tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
 
     if (!CODE64(s)) {
         if (s->addseg) {
             new_esp = cpu_tmp4;
-            tcg_gen_mov_tl(new_esp, cpu_A0);
+            tcg_gen_mov_tl(new_esp, s->A0);
         }
-        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
+        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
     }
 
-    gen_op_st_v(s, d_ot, val, cpu_A0);
+    gen_op_st_v(s, d_ot, val, s->A0);
     gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
 }
 
@@ -2356,7 +2356,7 @@ static TCGMemOp gen_pop_T0(DisasContext *s)
     TCGMemOp d_ot = mo_pushpop(s, s->dflag);
 
     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
-    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
 
     return d_ot;
 }
@@ -2379,9 +2379,9 @@ static void gen_pusha(DisasContext *s)
     int i;
 
     for (i = 0; i < 8; i++) {
-        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
-        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
-        gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
+        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
+        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
+        gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
     }
 
     gen_stack_update(s, -8 * size);
@@ -2399,9 +2399,9 @@ static void gen_popa(DisasContext *s)
         if (7 - i == R_ESP) {
             continue;
         }
-        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
-        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
-        gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
+        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
+        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
+        gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
         gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
     }
 
@@ -2417,7 +2417,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
     /* Push BP; compute FrameTemp into T1.  */
     tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
     gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
-    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
+    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
 
     level &= 31;
     if (level != 0) {
@@ -2425,19 +2425,19 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
 
         /* Copy level-1 pointers from the previous frame.  */
         for (i = 1; i < level; ++i) {
-            tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
-            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
-            gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
+            tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
+            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
+            gen_op_ld_v(s, d_ot, cpu_tmp0, s->A0);
 
-            tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
-            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
-            gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
+            tcg_gen_subi_tl(s->A0, cpu_T1, size * i);
+            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
+            gen_op_st_v(s, d_ot, cpu_tmp0, s->A0);
         }
 
         /* Push the current FrameTemp as the last level.  */
-        tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
-        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
-        gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
+        tcg_gen_subi_tl(s->A0, cpu_T1, size * level);
+        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
+        gen_op_st_v(s, d_ot, cpu_T1, s->A0);
     }
 
     /* Copy the FrameTemp value to EBP.  */
@@ -2454,7 +2454,7 @@ static void gen_leave(DisasContext *s)
     TCGMemOp a_ot = mo_stacksize(s);
 
     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
-    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
 
     tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
 
@@ -2633,22 +2633,22 @@ static void gen_jmp(DisasContext *s, target_ulong eip)
 
 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
 {
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
+    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
 }
 
 static inline void gen_stq_env_A0(DisasContext *s, int offset)
 {
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
+    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
 }
 
 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
 {
     int mem_index = s->mem_index;
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
+    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
+    tcg_gen_addi_tl(cpu_tmp0, s->A0, 8);
     tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
 }
@@ -2657,8 +2657,8 @@ static inline void gen_sto_env_A0(DisasContext *s, int offset)
 {
     int mem_index = s->mem_index;
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
-    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
+    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
+    tcg_gen_addi_tl(cpu_tmp0, s->A0, 8);
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
     tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
 }
@@ -3128,7 +3128,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             } else {
                 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
                     xmm_regs[reg].ZMM_L(0)));
-                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
+                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
             }
             break;
         case 0x6e: /* movd mm, ea */
@@ -3193,7 +3193,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
                 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
                 tcg_gen_movi_tl(cpu_T0, 0);
                 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
@@ -3380,7 +3380,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
                 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
-                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
+                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
@@ -3555,7 +3555,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
                 } else {
-                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
+                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
                     tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
                 }
                 op2_offset = offsetof(CPUX86State,xmm_t0);
@@ -3694,13 +3694,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         break;
                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
                                         offsetof(ZMMReg, ZMM_L(0)));
                         break;
                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
-                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
+                        tcg_gen_qemu_ld_tl(cpu_tmp0, s->A0,
                                            s->mem_index, MO_LEUW);
                         tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
                                         offsetof(ZMMReg, ZMM_W(0)));
@@ -3789,11 +3789,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 
                 gen_lea_modrm(env, s, modrm);
                 if ((b & 1) == 0) {
-                    tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
+                    tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
                                        s->mem_index, ot | MO_BE);
                     gen_op_mov_reg_v(ot, reg, cpu_T0);
                 } else {
-                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
+                    tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
                                        s->mem_index, ot | MO_BE);
                 }
                 break;
@@ -3825,23 +3825,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                     /* Extract START, and shift the operand.
                        Shifts larger than operand size get zeros.  */
-                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
-                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
+                    tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
+                    tcg_gen_shr_tl(cpu_T0, cpu_T0, s->A0);
 
                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
                     zero = tcg_const_tl(0);
-                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
+                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, s->A0, bound,
                                        cpu_T0, zero);
                     tcg_temp_free(zero);
 
                     /* Extract the LEN into a mask.  Lengths larger than
                        operand size get all ones.  */
-                    tcg_gen_extract_tl(cpu_A0, cpu_regs[s->vex_v], 8, 8);
-                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
-                                       cpu_A0, bound);
+                    tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
+                    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
+                                       s->A0, bound);
                     tcg_temp_free(bound);
                     tcg_gen_movi_tl(cpu_T1, 1);
-                    tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
+                    tcg_gen_shl_tl(cpu_T1, cpu_T1, s->A0);
                     tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
                     tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
 
@@ -3870,9 +3870,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                        bound, bound, cpu_T1);
                     tcg_temp_free(bound);
                 }
-                tcg_gen_movi_tl(cpu_A0, -1);
-                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
-                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
+                tcg_gen_movi_tl(s->A0, -1);
+                tcg_gen_shl_tl(s->A0, s->A0, cpu_T1);
+                tcg_gen_andc_tl(cpu_T0, cpu_T0, s->A0);
                 gen_op_mov_reg_v(ot, reg, cpu_T0);
                 gen_op_update1_cc();
                 set_cc_op(s, CC_OP_BMILGB + ot);
@@ -4124,7 +4124,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     if (mod == 3) {
                         gen_op_mov_reg_v(ot, rm, cpu_T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
+                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
                                            s->mem_index, MO_UB);
                     }
                     break;
@@ -4134,7 +4134,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     if (mod == 3) {
                         gen_op_mov_reg_v(ot, rm, cpu_T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
+                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
                                            s->mem_index, MO_LEUW);
                     }
                     break;
@@ -4146,7 +4146,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         if (mod == 3) {
                             tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
                         } else {
-                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                                 s->mem_index, MO_LEUL);
                         }
                     } else { /* pextrq */
@@ -4157,7 +4157,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         if (mod == 3) {
                             tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
                         } else {
-                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
+                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
                                                 s->mem_index, MO_LEQ);
                         }
 #else
@@ -4171,7 +4171,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     if (mod == 3) {
                         gen_op_mov_reg_v(ot, rm, cpu_T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
+                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
                                            s->mem_index, MO_LEUL);
                     }
                     break;
@@ -4179,7 +4179,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     if (mod == 3) {
                         gen_op_mov_v_reg(MO_32, cpu_T0, rm);
                     } else {
-                        tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
+                        tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
                                            s->mem_index, MO_UB);
                     }
                     tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
@@ -4191,7 +4191,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                         offsetof(CPUX86State,xmm_regs[rm]
                                                 .ZMM_L((val >> 6) & 3)));
                     } else {
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                     }
                     tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
@@ -4219,7 +4219,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         if (mod == 3) {
                             tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
                         } else {
-                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                                 s->mem_index, MO_LEUL);
                         }
                         tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
@@ -4230,7 +4230,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         if (mod == 3) {
                             gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
                         } else {
-                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
+                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
                                                 s->mem_index, MO_LEQ);
                         }
                         tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
@@ -4360,7 +4360,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 switch (sz) {
                 case 2:
                     /* 32 bit access */
-                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
+                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
                     tcg_gen_st32_tl(cpu_T0, cpu_env,
                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
                     break;
@@ -4426,15 +4426,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             /* maskmov : we must prepare A0 */
             if (mod != 3)
                 goto illegal_op;
-            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
-            gen_extu(s->aflag, cpu_A0);
+            tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
+            gen_extu(s->aflag, s->A0);
             gen_add_A0_ds_seg(s);
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             /* XXX: introduce a new table? */
             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
-            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
+            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, s->A0);
             break;
         default:
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
@@ -4673,7 +4673,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
                     gen_lea_modrm(env, s, modrm);
-                    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+                    gen_op_ld_v(s, ot, cpu_T1, s->A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
                 } else {
@@ -4760,7 +4760,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* For those below that handle locked memory, don't load here.  */
             if (!(s->prefix & PREFIX_LOCK)
                 || op != 2) {
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T0, s->A0);
             }
         } else {
             gen_op_mov_v_reg(ot, cpu_T0, rm);
@@ -4779,12 +4779,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     goto illegal_op;
                 }
                 tcg_gen_movi_tl(cpu_T0, ~0);
-                tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+                tcg_gen_atomic_xor_fetch_tl(cpu_T0, s->A0, cpu_T0,
                                             s->mem_index, ot | MO_LE);
             } else {
                 tcg_gen_not_tl(cpu_T0, cpu_T0);
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                    gen_op_st_v(s, ot, cpu_T0, s->A0);
                 } else {
                     gen_op_mov_reg_v(ot, rm, cpu_T0);
                 }
@@ -4802,7 +4802,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 t0 = tcg_temp_local_new();
                 label1 = gen_new_label();
 
-                tcg_gen_mov_tl(a0, cpu_A0);
+                tcg_gen_mov_tl(a0, s->A0);
                 tcg_gen_mov_tl(t0, cpu_T0);
 
                 gen_set_label(label1);
@@ -4822,7 +4822,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 tcg_gen_neg_tl(cpu_T0, cpu_T0);
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                    gen_op_st_v(s, ot, cpu_T0, s->A0);
                 } else {
                     gen_op_mov_reg_v(ot, rm, cpu_T0);
                 }
@@ -5001,7 +5001,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod != 3) {
             gen_lea_modrm(env, s, modrm);
             if (op >= 2 && op != 3 && op != 5)
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T0, s->A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T0, rm);
         }
@@ -5034,9 +5034,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_jr(s, cpu_T0);
             break;
         case 3: /* lcall Ev */
-            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+            gen_op_ld_v(s, ot, cpu_T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
         do_lcall:
             if (s->pe && !s->vm86) {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
@@ -5061,9 +5061,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_jr(s, cpu_T0);
             break;
         case 5: /* ljmp Ev */
-            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+            gen_op_ld_v(s, ot, cpu_T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
         do_ljmp:
             if (s->pe && !s->vm86) {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
@@ -5225,13 +5225,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         } else {
             gen_lea_modrm(env, s, modrm);
             if (s->prefix & PREFIX_LOCK) {
-                tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
+                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, cpu_T0,
                                             s->mem_index, ot | MO_LE);
                 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
             } else {
-                gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T1, s->A0);
                 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_st_v(s, ot, cpu_T0, s->A0);
             }
             gen_op_mov_reg_v(ot, reg, cpu_T1);
         }
@@ -5258,7 +5258,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     goto illegal_op;
                 }
                 gen_lea_modrm(env, s, modrm);
-                tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
+                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
                                           s->mem_index, ot | MO_LE);
                 gen_op_mov_reg_v(ot, R_EAX, oldv);
             } else {
@@ -5267,7 +5267,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     gen_op_mov_v_reg(ot, oldv, rm);
                 } else {
                     gen_lea_modrm(env, s, modrm);
-                    gen_op_ld_v(s, ot, oldv, cpu_A0);
+                    gen_op_ld_v(s, ot, oldv, s->A0);
                     rm = 0; /* avoid warning */
                 }
                 gen_extu(ot, oldv);
@@ -5282,7 +5282,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                        must be before changing accumulator to ensure
                        idempotency if the store faults and the instruction
                        is restarted */
-                    gen_op_st_v(s, ot, newv, cpu_A0);
+                    gen_op_st_v(s, ot, newv, s->A0);
                     gen_op_mov_reg_v(ot, R_EAX, oldv);
                 }
             }
@@ -5306,9 +5306,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             gen_lea_modrm(env, s, modrm);
             if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
-                gen_helper_cmpxchg16b(cpu_env, cpu_A0);
+                gen_helper_cmpxchg16b(cpu_env, s->A0);
             } else {
-                gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0);
+                gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
             }
         } else
 #endif        
@@ -5317,9 +5317,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             gen_lea_modrm(env, s, modrm);
             if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
-                gen_helper_cmpxchg8b(cpu_env, cpu_A0);
+                gen_helper_cmpxchg8b(cpu_env, s->A0);
             } else {
-                gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0);
+                gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
             }
         }
         set_cc_op(s, CC_OP_EFLAGS);
@@ -5453,7 +5453,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         val = insn_get(env, s, ot);
         tcg_gen_movi_tl(cpu_T0, val);
         if (mod != 3) {
-            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+            gen_op_st_v(s, ot, cpu_T0, s->A0);
         } else {
             gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
         }
@@ -5540,7 +5540,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, s_ot, cpu_T0, s->A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
             }
         }
@@ -5554,9 +5554,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
         {
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
-            TCGv ea = gen_lea_modrm_1(a);
+            TCGv ea = gen_lea_modrm_1(s, a);
             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
-            gen_op_mov_reg_v(dflag, reg, cpu_A0);
+            gen_op_mov_reg_v(dflag, reg, s->A0);
         }
         break;
 
@@ -5578,24 +5578,24 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 offset_addr = insn_get(env, s, s->aflag);
                 break;
             }
-            tcg_gen_movi_tl(cpu_A0, offset_addr);
+            tcg_gen_movi_tl(s->A0, offset_addr);
             gen_add_A0_ds_seg(s);
             if ((b & 2) == 0) {
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T0, s->A0);
                 gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
             } else {
                 gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
-                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_st_v(s, ot, cpu_T0, s->A0);
             }
         }
         break;
     case 0xd7: /* xlat */
-        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
+        tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
         tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
-        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0);
-        gen_extu(s->aflag, cpu_A0);
+        tcg_gen_add_tl(s->A0, s->A0, cpu_T0);
+        gen_extu(s->aflag, s->A0);
         gen_add_A0_ds_seg(s);
-        gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, MO_8, cpu_T0, s->A0);
         gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
         break;
     case 0xb0 ... 0xb7: /* mov R, Ib */
@@ -5646,7 +5646,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
             gen_op_mov_v_reg(ot, cpu_T0, reg);
             /* for xchg, lock is implicit */
-            tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
+            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, cpu_T0,
                                    s->mem_index, ot | MO_LE);
             gen_op_mov_reg_v(ot, reg, cpu_T1);
         }
@@ -5675,10 +5675,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod == 3)
             goto illegal_op;
         gen_lea_modrm(env, s, modrm);
-        gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T1, s->A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
-        gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
         gen_movl_seg_T0(s, op);
         /* then put the data */
         gen_op_mov_reg_v(ot, reg, cpu_T1);
@@ -5798,23 +5798,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
                     switch(op >> 4) {
                     case 0:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 1:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
+                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
                         break;
                     case 3:
                     default:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LESW);
                         gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
                         break;
@@ -5837,23 +5837,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 case 0:
                     switch(op >> 4) {
                     case 0:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 1:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
+                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
                         break;
                     case 3:
                     default:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LESW);
                         gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
                         break;
@@ -5864,18 +5864,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     switch(op >> 4) {
                     case 1:
                         gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
                         gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
+                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         break;
                     case 3:
                     default:
                         gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUW);
                         break;
                     }
@@ -5885,23 +5885,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     switch(op >> 4) {
                     case 0:
                         gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 1:
                         gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
                         gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
+                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         break;
                     case 3:
                     default:
                         gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUW);
                         break;
                     }
@@ -5911,53 +5911,53 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 }
                 break;
             case 0x0c: /* fldenv mem */
-                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0d: /* fldcw mem */
-                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
                 gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
                 break;
             case 0x0e: /* fnstenv mem */
-                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0f: /* fnstcw mem */
                 gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
-                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
                 break;
             case 0x1d: /* fldt mem */
-                gen_helper_fldt_ST0(cpu_env, cpu_A0);
+                gen_helper_fldt_ST0(cpu_env, s->A0);
                 break;
             case 0x1f: /* fstpt mem */
-                gen_helper_fstt_ST0(cpu_env, cpu_A0);
+                gen_helper_fstt_ST0(cpu_env, s->A0);
                 gen_helper_fpop(cpu_env);
                 break;
             case 0x2c: /* frstor mem */
-                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x2e: /* fnsave mem */
-                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x2f: /* fnstsw mem */
                 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
-                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
                 break;
             case 0x3c: /* fbld */
-                gen_helper_fbld_ST0(cpu_env, cpu_A0);
+                gen_helper_fbld_ST0(cpu_env, s->A0);
                 break;
             case 0x3e: /* fbstp */
-                gen_helper_fbst_ST0(cpu_env, cpu_A0);
+                gen_helper_fbst_ST0(cpu_env, s->A0);
                 gen_helper_fpop(cpu_env);
                 break;
             case 0x3d: /* fildll */
-                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
+                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
                 gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
                 break;
             case 0x3f: /* fistpll */
                 gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
-                tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
+                tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
                 gen_helper_fpop(cpu_env);
                 break;
             default:
@@ -6471,13 +6471,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         } else {
             gen_stack_A0(s);
             /* pop offset */
-            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
             /* NOTE: keeping EIP updated is not a problem in case of
                exception */
             gen_op_jmp_v(cpu_T0);
             /* pop selector */
             gen_add_A0_im(s, 1 << dflag);
-            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
             gen_op_movl_seg_T0_vm(R_CS);
             /* add stack offset */
             gen_stack_update(s, val + (2 << dflag));
@@ -6732,7 +6732,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             s->rip_offset = 1;
             gen_lea_modrm(env, s, modrm);
             if (!(s->prefix & PREFIX_LOCK)) {
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T0, s->A0);
             }
         } else {
             gen_op_mov_v_reg(ot, cpu_T0, rm);
@@ -6768,10 +6768,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_exts(ot, cpu_T1);
             tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
             tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
-            tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
-            gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
+            gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
             if (!(s->prefix & PREFIX_LOCK)) {
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T0, s->A0);
             }
         } else {
             gen_op_mov_v_reg(ot, cpu_T0, rm);
@@ -6785,20 +6785,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             case 0: /* bt */
                 /* Needs no atomic ops; we surpressed the normal
                    memory load for LOCK above so do it now.  */
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T0, s->A0);
                 break;
             case 1: /* bts */
-                tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_or_tl(cpu_T0, s->A0, cpu_tmp0,
                                            s->mem_index, ot | MO_LE);
                 break;
             case 2: /* btr */
                 tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
-                tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_and_tl(cpu_T0, s->A0, cpu_tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             default:
             case 3: /* btc */
-                tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_xor_tl(cpu_T0, s->A0, cpu_tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             }
@@ -6822,7 +6822,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             if (op != 0) {
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                    gen_op_st_v(s, ot, cpu_T0, s->A0);
                 } else {
                     gen_op_mov_reg_v(ot, rm, cpu_T0);
                 }
@@ -7051,9 +7051,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_lea_modrm(env, s, modrm);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
         if (ot == MO_16) {
-            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
+            gen_helper_boundw(cpu_env, s->A0, cpu_tmp2_i32);
         } else {
-            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
+            gen_helper_boundl(cpu_env, s->A0, cpu_tmp2_i32);
         }
         break;
     case 0x1c8 ... 0x1cf: /* bswap reg */
@@ -7293,13 +7293,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
             tcg_gen_ld32u_tl(cpu_T0,
                              cpu_env, offsetof(CPUX86State, gdt.limit));
-            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
+            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
             gen_add_A0_im(s, 2);
             tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
             if (dflag == MO_16) {
                 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
             }
-            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
             break;
 
         case 0xc8: /* monitor */
@@ -7308,10 +7308,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
-            gen_extu(s->aflag, cpu_A0);
+            tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
+            gen_extu(s->aflag, s->A0);
             gen_add_A0_ds_seg(s);
-            gen_helper_monitor(cpu_env, cpu_A0);
+            gen_helper_monitor(cpu_env, s->A0);
             break;
 
         case 0xc9: /* mwait */
@@ -7348,13 +7348,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
             gen_lea_modrm(env, s, modrm);
             tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
-            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
+            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
             gen_add_A0_im(s, 2);
             tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
             if (dflag == MO_16) {
                 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
             }
-            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
             break;
 
         case 0xd0: /* xgetbv */
@@ -7498,9 +7498,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
+            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
             gen_add_A0_im(s, 2);
-            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
             if (dflag == MO_16) {
                 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
             }
@@ -7515,9 +7515,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
+            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
             gen_add_A0_im(s, 2);
-            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
             if (dflag == MO_16) {
                 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
             }
@@ -7573,7 +7573,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_lea_modrm(env, s, modrm);
-            gen_helper_invlpg(cpu_env, cpu_A0);
+            gen_helper_invlpg(cpu_env, s->A0);
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
             break;
@@ -7646,7 +7646,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, s->A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
             }
         } else
@@ -7667,9 +7667,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             rm = modrm & 7;
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, ot, t0, cpu_A0);
+                gen_op_ld_v(s, ot, t0, s->A0);
                 a0 = tcg_temp_local_new();
-                tcg_gen_mov_tl(a0, cpu_A0);
+                tcg_gen_mov_tl(a0, s->A0);
             } else {
                 gen_op_mov_v_reg(ot, t0, rm);
                 a0 = NULL;
@@ -7785,16 +7785,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 } else {
                     gen_lea_modrm(env, s, modrm);
                     if (CODE64(s)) {
-                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
+                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
                                             s->mem_index, MO_LEQ);
-                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
-                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
+                        tcg_gen_addi_tl(s->A0, s->A0, 8);
+                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
                                             s->mem_index, MO_LEQ);
                     } else {
-                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
+                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
                                             s->mem_index, MO_LEUL);
-                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
-                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
+                        tcg_gen_addi_tl(s->A0, s->A0, 4);
+                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
                                             s->mem_index, MO_LEUL);
                     }
                     /* bnd registers are now in-use */
@@ -7810,22 +7810,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     goto illegal_op;
                 }
                 if (a.base >= 0) {
-                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
+                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
                 } else {
-                    tcg_gen_movi_tl(cpu_A0, 0);
+                    tcg_gen_movi_tl(s->A0, 0);
                 }
-                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
                 if (a.index >= 0) {
                     tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
                 } else {
                     tcg_gen_movi_tl(cpu_T0, 0);
                 }
                 if (CODE64(s)) {
-                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0);
+                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, cpu_T0);
                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
                 } else {
-                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0);
+                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, cpu_T0);
                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
                 }
@@ -7859,11 +7859,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     /* rip-relative generates #ud */
                     goto illegal_op;
                 }
-                tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a));
+                tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
                 if (!CODE64(s)) {
-                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+                    tcg_gen_ext32u_tl(s->A0, s->A0);
                 }
-                tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0);
+                tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
                 /* bnd registers are now in-use */
                 gen_set_hflag(s, HF_MPX_IU_MASK);
                 break;
@@ -7892,16 +7892,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 } else {
                     gen_lea_modrm(env, s, modrm);
                     if (CODE64(s)) {
-                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
+                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
                                             s->mem_index, MO_LEQ);
-                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
-                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
+                        tcg_gen_addi_tl(s->A0, s->A0, 8);
+                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
                                             s->mem_index, MO_LEQ);
                     } else {
-                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
+                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
                                             s->mem_index, MO_LEUL);
-                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
-                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
+                        tcg_gen_addi_tl(s->A0, s->A0, 4);
+                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
                                             s->mem_index, MO_LEUL);
                     }
                 }
@@ -7915,21 +7915,21 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     goto illegal_op;
                 }
                 if (a.base >= 0) {
-                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
+                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
                 } else {
-                    tcg_gen_movi_tl(cpu_A0, 0);
+                    tcg_gen_movi_tl(s->A0, 0);
                 }
-                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
                 if (a.index >= 0) {
                     tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
                 } else {
                     tcg_gen_movi_tl(cpu_T0, 0);
                 }
                 if (CODE64(s)) {
-                    gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0,
+                    gen_helper_bndstx64(cpu_env, s->A0, cpu_T0,
                                         cpu_bndl[reg], cpu_bndu[reg]);
                 } else {
-                    gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0,
+                    gen_helper_bndstx32(cpu_env, s->A0, cpu_T0,
                                         cpu_bndl[reg], cpu_bndu[reg]);
                 }
             }
@@ -8069,7 +8069,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            gen_helper_fxsave(cpu_env, cpu_A0);
+            gen_helper_fxsave(cpu_env, s->A0);
             break;
 
         CASE_MODRM_MEM_OP(1): /* fxrstor */
@@ -8082,7 +8082,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            gen_helper_fxrstor(cpu_env, cpu_A0);
+            gen_helper_fxrstor(cpu_env, s->A0);
             break;
 
         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
@@ -8094,7 +8094,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL);
+            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0, s->mem_index, MO_LEUL);
             gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
             break;
 
@@ -8108,7 +8108,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_lea_modrm(env, s, modrm);
             tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
-            gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
+            gen_op_st_v(s, MO_32, cpu_T0, s->A0);
             break;
 
         CASE_MODRM_MEM_OP(4): /* xsave */
@@ -8120,7 +8120,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
             tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
+            gen_helper_xsave(cpu_env, s->A0, cpu_tmp1_i64);
             break;
 
         CASE_MODRM_MEM_OP(5): /* xrstor */
@@ -8132,7 +8132,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
             tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
+            gen_helper_xrstor(cpu_env, s->A0, cpu_tmp1_i64);
             /* XRSTOR is how MPX is enabled, which changes how
                we translate.  Thus we need to end the TB.  */
             gen_update_cc_op(s);
@@ -8160,7 +8160,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_lea_modrm(env, s, modrm);
                 tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
                                       cpu_regs[R_EDX]);
-                gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64);
+                gen_helper_xsaveopt(cpu_env, s->A0, cpu_tmp1_i64);
             }
             break;
 
@@ -8458,7 +8458,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
 
     cpu_T0 = tcg_temp_new();
     cpu_T1 = tcg_temp_new();
-    cpu_A0 = tcg_temp_new();
+    dc->A0 = tcg_temp_new();
 
     cpu_tmp0 = tcg_temp_new();
     cpu_tmp1_i64 = tcg_temp_new_i64();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext Emilio G. Cota
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:47   ` Richard Henderson
  2018-09-13 14:25   ` Alex Bennée
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 " Emilio G. Cota
                   ` (10 subsequent siblings)
  13 siblings, 2 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 1174 ++++++++++++++++++++-------------------
 1 file changed, 594 insertions(+), 580 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index c6b1baab9d..73fd7e5b9a 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,7 +79,7 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 /* local temps */
-static TCGv cpu_T0, cpu_T1;
+static TCGv cpu_T1;
 /* local register indexes (only used inside old micro ops) */
 static TCGv cpu_tmp0, cpu_tmp4;
 static TCGv_ptr cpu_ptr0, cpu_ptr1;
@@ -138,6 +138,7 @@ typedef struct DisasContext {
     /* TCG local temps */
     TCGv cc_srcT;
     TCGv A0;
+    TCGv T0;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -412,9 +413,9 @@ static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
     gen_op_mov_reg_v(size, reg, cpu_tmp0);
 }
 
-static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
+static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
 {
-    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
+    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], s->T0);
     gen_op_mov_reg_v(size, reg, cpu_tmp0);
 }
 
@@ -431,9 +432,9 @@ static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 {
     if (d == OR_TMP0) {
-        gen_op_st_v(s, idx, cpu_T0, s->A0);
+        gen_op_st_v(s, idx, s->T0, s->A0);
     } else {
-        gen_op_mov_reg_v(idx, d, cpu_T0);
+        gen_op_mov_reg_v(idx, d, s->T0);
     }
 }
 
@@ -509,10 +510,10 @@ static inline void gen_string_movl_A0_EDI(DisasContext *s)
     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 }
 
-static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
+static inline void gen_op_movl_T0_Dshift(DisasContext *s, TCGMemOp ot)
 {
-    tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
-    tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
+    tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
+    tcg_gen_shli_tl(s->T0, s->T0, ot);
 };
 
 static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
@@ -610,7 +611,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
     target_ulong next_eip;
 
     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         switch (ot) {
         case MO_8:
             gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
@@ -630,7 +631,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
         gen_jmp_im(cur_eip);
         svm_flags |= (1 << (4 + ot));
         next_eip = s->pc - s->cs_base;
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
                                 tcg_const_i32(svm_flags),
                                 tcg_const_i32(next_eip - cur_eip));
@@ -640,41 +641,41 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, s->A0);
+    gen_op_ld_v(s, ot, s->T0, s->A0);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_v(s, ot, cpu_T0, s->A0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_ESI);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_op_st_v(s, ot, s->T0, s->A0);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_ESI);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 }
 
-static void gen_op_update1_cc(void)
+static void gen_op_update1_cc(DisasContext *s)
 {
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
-static void gen_op_update2_cc(void)
+static void gen_op_update2_cc(DisasContext *s)
 {
     tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
-static void gen_op_update3_cc(TCGv reg)
+static void gen_op_update3_cc(DisasContext *s, TCGv reg)
 {
     tcg_gen_mov_tl(cpu_cc_src2, reg);
     tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
-static inline void gen_op_testl_T0_T1_cc(void)
+static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
 {
-    tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
+    tcg_gen_and_tl(cpu_cc_dst, s->T0, cpu_T1);
 }
 
 static void gen_op_update_neg_cc(DisasContext *s)
 {
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-    tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+    tcg_gen_neg_tl(cpu_cc_src, s->T0);
     tcg_gen_movi_tl(s->cc_srcT, 0);
 }
 
@@ -1022,11 +1023,11 @@ static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
    value 'b'. In the fast case, T0 is guaranted not to be used. */
 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
+    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
 
     if (cc.mask != -1) {
-        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
-        cc.reg = cpu_T0;
+        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
+        cc.reg = s->T0;
     }
     if (cc.use_reg2) {
         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
@@ -1040,12 +1041,12 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
    A translation block must end soon.  */
 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
+    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
 
     gen_update_cc_op(s);
     if (cc.mask != -1) {
-        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
-        cc.reg = cpu_T0;
+        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
+        cc.reg = s->T0;
     }
     set_cc_op(s, CC_OP_DYNAMIC);
     if (cc.use_reg2) {
@@ -1070,20 +1071,20 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
 
 static inline void gen_stos(DisasContext *s, TCGMemOp ot)
 {
-    gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
+    gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_v(s, ot, cpu_T0, s->A0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_op_st_v(s, ot, s->T0, s->A0);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 }
 
 static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, s->A0);
-    gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_ESI);
+    gen_op_ld_v(s, ot, s->T0, s->A0);
+    gen_op_mov_reg_v(ot, R_EAX, s->T0);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_ESI);
 }
 
 static inline void gen_scas(DisasContext *s, TCGMemOp ot)
@@ -1091,8 +1092,8 @@ static inline void gen_scas(DisasContext *s, TCGMemOp ot)
     gen_string_movl_A0_EDI(s);
     gen_op_ld_v(s, ot, cpu_T1, s->A0);
     gen_op(s, OP_CMPL, ot, R_EAX);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 }
 
 static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
@@ -1101,9 +1102,9 @@ static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
     gen_op_ld_v(s, ot, cpu_T1, s->A0);
     gen_string_movl_A0_ESI(s);
     gen_op(s, OP_CMPL, ot, OR_TMP0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_ESI);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_ESI);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 }
 
 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
@@ -1127,14 +1128,14 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot)
     gen_string_movl_A0_EDI(s);
     /* Note: we must do this dummy write first to be restartable in
        case of page fault. */
-    tcg_gen_movi_tl(cpu_T0, 0);
-    gen_op_st_v(s, ot, cpu_T0, s->A0);
+    tcg_gen_movi_tl(s->T0, 0);
+    gen_op_st_v(s, ot, s->T0, s->A0);
     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
-    gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
-    gen_op_st_v(s, ot, cpu_T0, s->A0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_helper_in_func(ot, s->T0, cpu_tmp2_i32);
+    gen_op_st_v(s, ot, s->T0, s->A0);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
     gen_bpt_io(s, cpu_tmp2_i32, ot);
     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
         gen_io_end();
@@ -1147,14 +1148,14 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
         gen_io_start();
     }
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, s->A0);
+    gen_op_ld_v(s, ot, s->T0, s->A0);
 
     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
-    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
+    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T0);
     gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_ESI);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_ESI);
     gen_bpt_io(s, cpu_tmp2_i32, ot);
     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
         gen_io_end();
@@ -1265,103 +1266,103 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
 {
     if (d != OR_TMP0) {
-        gen_op_mov_v_reg(ot, cpu_T0, d);
+        gen_op_mov_v_reg(ot, s1->T0, d);
     } else if (!(s1->prefix & PREFIX_LOCK)) {
-        gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
+        gen_op_ld_v(s1, ot, s1->T0, s1->A0);
     }
     switch(op) {
     case OP_ADCL:
         gen_compute_eflags_c(s1, cpu_tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
+            tcg_gen_add_tl(s1->T0, cpu_tmp4, cpu_T1);
+            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
+            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_add_tl(s1->T0, s1->T0, cpu_tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update3_cc(cpu_tmp4);
+        gen_op_update3_cc(s1, cpu_tmp4);
         set_cc_op(s1, CC_OP_ADCB + ot);
         break;
     case OP_SBBL:
         gen_compute_eflags_c(s1, cpu_tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
-            tcg_gen_neg_tl(cpu_T0, cpu_T0);
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
+            tcg_gen_add_tl(s1->T0, cpu_T1, cpu_tmp4);
+            tcg_gen_neg_tl(s1->T0, s1->T0);
+            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
-            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
+            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update3_cc(cpu_tmp4);
+        gen_op_update3_cc(s1, cpu_tmp4);
         set_cc_op(s1, CC_OP_SBBB + ot);
         break;
     case OP_ADDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, cpu_T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update2_cc();
+        gen_op_update2_cc(s1);
         set_cc_op(s1, CC_OP_ADDB + ot);
         break;
     case OP_SUBL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_neg_tl(cpu_T0, cpu_T1);
-            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, cpu_T0,
+            tcg_gen_neg_tl(s1->T0, cpu_T1);
+            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
-            tcg_gen_sub_tl(cpu_T0, s1->cc_srcT, cpu_T1);
+            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, cpu_T1);
         } else {
-            tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
-            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
+            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update2_cc();
+        gen_op_update2_cc(s1);
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     default:
     case OP_ANDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_and_fetch_tl(cpu_T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, cpu_T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_and_tl(s1->T0, s1->T0, cpu_T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update1_cc();
+        gen_op_update1_cc(s1);
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_ORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_or_fetch_tl(cpu_T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, cpu_T1,
                                        s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_or_tl(s1->T0, s1->T0, cpu_T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update1_cc();
+        gen_op_update1_cc(s1);
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_XORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_xor_fetch_tl(cpu_T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, cpu_T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_xor_tl(s1->T0, s1->T0, cpu_T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update1_cc();
+        gen_op_update1_cc(s1);
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_CMPL:
         tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
-        tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
-        tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
+        tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
+        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, cpu_T1);
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     }
@@ -1371,21 +1372,21 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
 static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
 {
     if (s1->prefix & PREFIX_LOCK) {
-        tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
-        tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
+        tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
+        tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                     s1->mem_index, ot | MO_LE);
     } else {
         if (d != OR_TMP0) {
-            gen_op_mov_v_reg(ot, cpu_T0, d);
+            gen_op_mov_v_reg(ot, s1->T0, d);
         } else {
-            gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
+            gen_op_ld_v(s1, ot, s1->T0, s1->A0);
         }
-        tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
+        tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
         gen_op_st_rm_T0_A0(s1, ot, d);
     }
 
     gen_compute_eflags_c(s1, cpu_cc_src);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
 }
 
@@ -1441,9 +1442,9 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, s->A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(ot, s->T0, op1);
     }
 
     tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
@@ -1451,23 +1452,23 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     if (is_right) {
         if (is_arith) {
-            gen_exts(ot, cpu_T0);
-            tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
-            tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
+            gen_exts(ot, s->T0);
+            tcg_gen_sar_tl(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
         } else {
-            gen_extu(ot, cpu_T0);
-            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
-            tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
+            gen_extu(ot, s->T0);
+            tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
         }
     } else {
-        tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
-        tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
+        tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
+        tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
     }
 
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
+    gen_shift_flags(s, ot, s->T0, cpu_tmp0, cpu_T1, is_right);
 }
 
 static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
@@ -1477,25 +1478,25 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, cpu_T0, s->A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     else
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(ot, s->T0, op1);
 
     op2 &= mask;
     if (op2 != 0) {
         if (is_right) {
             if (is_arith) {
-                gen_exts(ot, cpu_T0);
-                tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
-                tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
+                gen_exts(ot, s->T0);
+                tcg_gen_sari_tl(cpu_tmp4, s->T0, op2 - 1);
+                tcg_gen_sari_tl(s->T0, s->T0, op2);
             } else {
-                gen_extu(ot, cpu_T0);
-                tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
-                tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
+                gen_extu(ot, s->T0);
+                tcg_gen_shri_tl(cpu_tmp4, s->T0, op2 - 1);
+                tcg_gen_shri_tl(s->T0, s->T0, op2);
             }
         } else {
-            tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
-            tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
+            tcg_gen_shli_tl(cpu_tmp4, s->T0, op2 - 1);
+            tcg_gen_shli_tl(s->T0, s->T0, op2);
         }
     }
 
@@ -1505,7 +1506,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
     /* update eflags if non zero shift */
     if (op2 != 0) {
         tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
-        tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+        tcg_gen_mov_tl(cpu_cc_dst, s->T0);
         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
     }
 }
@@ -1517,9 +1518,9 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, s->A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(ot, s->T0, op1);
     }
 
     tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
@@ -1527,31 +1528,31 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
     switch (ot) {
     case MO_8:
         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
-        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
-        tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
+        tcg_gen_ext8u_tl(s->T0, s->T0);
+        tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
         goto do_long;
     case MO_16:
         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
-        tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
+        tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
         goto do_long;
     do_long:
 #ifdef TARGET_X86_64
     case MO_32:
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
         if (is_right) {
             tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
         } else {
             tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
         }
-        tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
+        tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
         break;
 #endif
     default:
         if (is_right) {
-            tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_rotr_tl(s->T0, s->T0, cpu_T1);
         } else {
-            tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_rotl_tl(s->T0, s->T0, cpu_T1);
         }
         break;
     }
@@ -1567,12 +1568,12 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
        since we've computed the flags into CC_SRC, these variables are
        currently dead.  */
     if (is_right) {
-        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
-        tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
+        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
+        tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
     } else {
-        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
-        tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
+        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
+        tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
     }
     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
@@ -1603,9 +1604,9 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, s->A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(ot, s->T0, op1);
     }
 
     op2 &= mask;
@@ -1613,20 +1614,20 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
         switch (ot) {
 #ifdef TARGET_X86_64
         case MO_32:
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
             if (is_right) {
                 tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
             } else {
                 tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
             }
-            tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
+            tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
             break;
 #endif
         default:
             if (is_right) {
-                tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
+                tcg_gen_rotri_tl(s->T0, s->T0, op2);
             } else {
-                tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
+                tcg_gen_rotli_tl(s->T0, s->T0, op2);
             }
             break;
         case MO_8:
@@ -1639,10 +1640,10 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
             if (is_right) {
                 shift = mask + 1 - shift;
             }
-            gen_extu(ot, cpu_T0);
-            tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
-            tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
-            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
+            gen_extu(ot, s->T0);
+            tcg_gen_shli_tl(cpu_tmp0, s->T0, shift);
+            tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
+            tcg_gen_or_tl(s->T0, s->T0, cpu_tmp0);
             break;
         }
     }
@@ -1659,12 +1660,12 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
            since we've computed the flags into CC_SRC, these variables are
            currently dead.  */
         if (is_right) {
-            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
-            tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
+            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
+            tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
         } else {
-            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
-            tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
+            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
+            tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
         }
         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
@@ -1681,24 +1682,24 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, cpu_T0, s->A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     else
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(ot, s->T0, op1);
     
     if (is_right) {
         switch (ot) {
         case MO_8:
-            gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcrb(s->T0, cpu_env, s->T0, cpu_T1);
             break;
         case MO_16:
-            gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcrw(s->T0, cpu_env, s->T0, cpu_T1);
             break;
         case MO_32:
-            gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcrl(s->T0, cpu_env, s->T0, cpu_T1);
             break;
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcrq(s->T0, cpu_env, s->T0, cpu_T1);
             break;
 #endif
         default:
@@ -1707,17 +1708,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     } else {
         switch (ot) {
         case MO_8:
-            gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rclb(s->T0, cpu_env, s->T0, cpu_T1);
             break;
         case MO_16:
-            gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rclw(s->T0, cpu_env, s->T0, cpu_T1);
             break;
         case MO_32:
-            gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcll(s->T0, cpu_env, s->T0, cpu_T1);
             break;
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rclq(s->T0, cpu_env, s->T0, cpu_T1);
             break;
 #endif
         default:
@@ -1737,9 +1738,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, s->A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(ot, s->T0, op1);
     }
 
     count = tcg_temp_new();
@@ -1751,11 +1752,11 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
            portion by constructing it as a 32-bit value.  */
         if (is_right) {
-            tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
-            tcg_gen_mov_tl(cpu_T1, cpu_T0);
-            tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
+            tcg_gen_deposit_tl(cpu_tmp0, s->T0, cpu_T1, 16, 16);
+            tcg_gen_mov_tl(cpu_T1, s->T0);
+            tcg_gen_mov_tl(s->T0, cpu_tmp0);
         } else {
-            tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
+            tcg_gen_deposit_tl(cpu_T1, s->T0, cpu_T1, 16, 16);
         }
         /* FALLTHRU */
 #ifdef TARGET_X86_64
@@ -1763,28 +1764,28 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
         tcg_gen_subi_tl(cpu_tmp0, count, 1);
         if (is_right) {
-            tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
-            tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
-            tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
+            tcg_gen_concat_tl_i64(s->T0, s->T0, cpu_T1);
+            tcg_gen_shr_i64(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shr_i64(s->T0, s->T0, count);
         } else {
-            tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
-            tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
-            tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
+            tcg_gen_concat_tl_i64(s->T0, cpu_T1, s->T0);
+            tcg_gen_shl_i64(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shl_i64(s->T0, s->T0, count);
             tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
-            tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
+            tcg_gen_shri_i64(s->T0, s->T0, 32);
         }
         break;
 #endif
     default:
         tcg_gen_subi_tl(cpu_tmp0, count, 1);
         if (is_right) {
-            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
+            tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
-            tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
+            tcg_gen_shr_tl(s->T0, s->T0, count);
             tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
         } else {
-            tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
+            tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
             if (ot == MO_16) {
                 /* Only needed if count > 16, for Intel behaviour.  */
                 tcg_gen_subfi_tl(cpu_tmp4, 33, count);
@@ -1793,20 +1794,20 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
             }
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
-            tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
+            tcg_gen_shl_tl(s->T0, s->T0, count);
             tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
         }
         tcg_gen_movi_tl(cpu_tmp4, 0);
         tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
                            cpu_tmp4, cpu_T1);
-        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
+        tcg_gen_or_tl(s->T0, s->T0, cpu_T1);
         break;
     }
 
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
+    gen_shift_flags(s, ot, s->T0, cpu_tmp0, count, is_right);
     tcg_temp_free(count);
 }
 
@@ -2126,23 +2127,23 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
     if (mod == 3) {
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_v_reg(ot, cpu_T0, reg);
-            gen_op_mov_reg_v(ot, rm, cpu_T0);
+                gen_op_mov_v_reg(ot, s->T0, reg);
+            gen_op_mov_reg_v(ot, rm, s->T0);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(ot, s->T0, rm);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(ot, reg, s->T0);
         }
     } else {
         gen_lea_modrm(env, s, modrm);
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_v_reg(ot, cpu_T0, reg);
-            gen_op_st_v(s, ot, cpu_T0, s->A0);
+                gen_op_mov_v_reg(ot, s->T0, reg);
+            gen_op_st_v(s, ot, s->T0, s->A0);
         } else {
-            gen_op_ld_v(s, ot, cpu_T0, s->A0);
+            gen_op_ld_v(s, ot, s->T0, s->A0);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(ot, reg, s->T0);
         }
     }
 }
@@ -2251,9 +2252,9 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
         cc.reg2 = tcg_const_tl(cc.imm);
     }
 
-    tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
-                       cpu_T0, cpu_regs[reg]);
-    gen_op_mov_reg_v(ot, reg, cpu_T0);
+    tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
+                       s->T0, cpu_regs[reg]);
+    gen_op_mov_reg_v(ot, reg, s->T0);
 
     if (cc.mask != -1) {
         tcg_temp_free(cc.reg);
@@ -2263,18 +2264,18 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
     }
 }
 
-static inline void gen_op_movl_T0_seg(int seg_reg)
+static inline void gen_op_movl_T0_seg(DisasContext *s, int seg_reg)
 {
-    tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+    tcg_gen_ld32u_tl(s->T0, cpu_env,
                      offsetof(CPUX86State,segs[seg_reg].selector));
 }
 
-static inline void gen_op_movl_seg_T0_vm(int seg_reg)
+static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg)
 {
-    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
-    tcg_gen_st32_tl(cpu_T0, cpu_env,
+    tcg_gen_ext16u_tl(s->T0, s->T0);
+    tcg_gen_st32_tl(s->T0, cpu_env,
                     offsetof(CPUX86State,segs[seg_reg].selector));
-    tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
+    tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
 }
 
 /* move T0 to seg_reg and compute if the CPU state may change. Never
@@ -2282,7 +2283,7 @@ static inline void gen_op_movl_seg_T0_vm(int seg_reg)
 static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
 {
     if (s->pe && !s->vm86) {
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
         /* abort translation because the addseg value may change or
            because ss32 may change. For R_SS, translation must always
@@ -2292,7 +2293,7 @@ static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
             s->base.is_jmp = DISAS_TOO_MANY;
         }
     } else {
-        gen_op_movl_seg_T0_vm(seg_reg);
+        gen_op_movl_seg_T0_vm(s, seg_reg);
         if (seg_reg == R_SS) {
             s->base.is_jmp = DISAS_TOO_MANY;
         }
@@ -2356,7 +2357,7 @@ static TCGMemOp gen_pop_T0(DisasContext *s)
     TCGMemOp d_ot = mo_pushpop(s, s->dflag);
 
     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
-    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
+    gen_op_ld_v(s, d_ot, s->T0, s->A0);
 
     return d_ot;
 }
@@ -2401,8 +2402,8 @@ static void gen_popa(DisasContext *s)
         }
         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
-        gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
-        gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
+        gen_op_ld_v(s, d_ot, s->T0, s->A0);
+        gen_op_mov_reg_v(d_ot, 7 - i, s->T0);
     }
 
     gen_stack_update(s, 8 * size);
@@ -2454,11 +2455,11 @@ static void gen_leave(DisasContext *s)
     TCGMemOp a_ot = mo_stacksize(s);
 
     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
-    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
+    gen_op_ld_v(s, d_ot, s->T0, s->A0);
 
     tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
 
-    gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
+    gen_op_mov_reg_v(d_ot, R_EBP, s->T0);
     gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
 }
 
@@ -3126,23 +3127,24 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
-                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                     xmm_regs[reg].ZMM_L(0)));
-                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
+                gen_op_st_v(s, MO_32, s->T0, s->A0);
             }
             break;
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
-                tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
+                tcg_gen_st_tl(s->T0, cpu_env,
+                              offsetof(CPUX86State, fpregs[reg].mmx));
             } else
 #endif
             {
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,fpregs[reg].mmx));
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
             }
             break;
@@ -3152,14 +3154,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg]));
-                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
+                gen_helper_movq_mm_T0_xmm(cpu_ptr0, s->T0);
             } else
 #endif
             {
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg]));
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
             }
             break;
@@ -3193,12 +3195,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
-                tcg_gen_movi_tl(cpu_T0, 0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
+                gen_op_ld_v(s, MO_32, s->T0, s->A0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
+                tcg_gen_movi_tl(s->T0, 0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
@@ -3210,9 +3216,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_lea_modrm(env, s, modrm);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].ZMM_Q(0)));
-                tcg_gen_movi_tl(cpu_T0, 0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
+                tcg_gen_movi_tl(s->T0, 0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
@@ -3314,13 +3322,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x7e: /* movd ea, mm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T0, cpu_env,
+                tcg_gen_ld_i64(s->T0, cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+                tcg_gen_ld32u_tl(s->T0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
             }
@@ -3328,13 +3336,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x17e: /* movd ea, xmm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T0, cpu_env,
+                tcg_gen_ld_i64(s->T0, cpu_env,
                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+                tcg_gen_ld32u_tl(s->T0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
             }
@@ -3379,8 +3387,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
-                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
+                tcg_gen_ld32u_tl(s->T0, cpu_env,
+                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
+                gen_op_st_v(s, MO_32, s->T0, s->A0);
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
@@ -3429,16 +3438,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             }
             val = x86_ldub_code(env, s);
             if (is_xmm) {
-                tcg_gen_movi_tl(cpu_T0, val);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
-                tcg_gen_movi_tl(cpu_T0, 0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
+                tcg_gen_movi_tl(s->T0, val);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
+                tcg_gen_movi_tl(s->T0, 0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
                 op1_offset = offsetof(CPUX86State,xmm_t0);
             } else {
-                tcg_gen_movi_tl(cpu_T0, val);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
-                tcg_gen_movi_tl(cpu_T0, 0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
+                tcg_gen_movi_tl(s->T0, val);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, mmx_t0.MMX_L(0)));
+                tcg_gen_movi_tl(s->T0, 0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, mmx_t0.MMX_L(1)));
                 op1_offset = offsetof(CPUX86State,mmx_t0);
             }
             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
@@ -3503,12 +3516,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
-                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
+                sse_fn_epl(cpu_env, cpu_ptr0, s->T0);
 #else
                 goto illegal_op;
 #endif
@@ -3555,8 +3568,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
                 } else {
-                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
-                    tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
+                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
+                    tcg_gen_st32_tl(s->T0, cpu_env,
+                                    offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
                 }
                 op2_offset = offsetof(CPUX86State,xmm_t0);
             } else {
@@ -3568,17 +3582,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 SSEFunc_i_ep sse_fn_i_ep =
                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
                 sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
-                tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
+                tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_l_ep sse_fn_l_ep =
                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
-                sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
+                sse_fn_l_ep(s->T0, cpu_env, cpu_ptr0);
 #else
                 goto illegal_op;
 #endif
             }
-            gen_op_mov_reg_v(ot, reg, cpu_T0);
+            gen_op_mov_reg_v(ot, reg, s->T0);
             break;
         case 0xc4: /* pinsrw */
         case 0x1c4:
@@ -3587,11 +3601,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             val = x86_ldub_code(env, s);
             if (b1) {
                 val &= 7;
-                tcg_gen_st16_tl(cpu_T0, cpu_env,
+                tcg_gen_st16_tl(s->T0, cpu_env,
                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
             } else {
                 val &= 3;
-                tcg_gen_st16_tl(cpu_T0, cpu_env,
+                tcg_gen_st16_tl(s->T0, cpu_env,
                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
             }
             break;
@@ -3604,16 +3618,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (b1) {
                 val &= 7;
                 rm = (modrm & 7) | REX_B(s);
-                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
+                tcg_gen_ld16u_tl(s->T0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
             } else {
                 val &= 3;
                 rm = (modrm & 7);
-                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
+                tcg_gen_ld16u_tl(s->T0, cpu_env,
                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
             }
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_op_mov_reg_v(ot, reg, cpu_T0);
+            gen_op_mov_reg_v(ot, reg, s->T0);
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
@@ -3760,11 +3774,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
-                                 cpu_T0, tcg_const_i32(8 << ot));
+                gen_helper_crc32(s->T0, cpu_tmp2_i32,
+                                 s->T0, tcg_const_i32(8 << ot));
 
                 ot = mo_64_32(s->dflag);
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(ot, reg, s->T0);
                 break;
 
             case 0x1f0: /* crc32 or movbe */
@@ -3789,9 +3803,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 
                 gen_lea_modrm(env, s, modrm);
                 if ((b & 1) == 0) {
-                    tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
+                    tcg_gen_qemu_ld_tl(s->T0, s->A0,
                                        s->mem_index, ot | MO_BE);
-                    gen_op_mov_reg_v(ot, reg, cpu_T0);
+                    gen_op_mov_reg_v(ot, reg, s->T0);
                 } else {
                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
                                        s->mem_index, ot | MO_BE);
@@ -3806,9 +3820,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_regs[s->vex_v]);
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
-                gen_op_update1_cc();
+                tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
+                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_update1_cc(s);
                 set_cc_op(s, CC_OP_LOGICB + ot);
                 break;
 
@@ -3826,12 +3840,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     /* Extract START, and shift the operand.
                        Shifts larger than operand size get zeros.  */
                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
-                    tcg_gen_shr_tl(cpu_T0, cpu_T0, s->A0);
+                    tcg_gen_shr_tl(s->T0, s->T0, s->A0);
 
                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
                     zero = tcg_const_tl(0);
-                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, s->A0, bound,
-                                       cpu_T0, zero);
+                    tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
+                                       s->T0, zero);
                     tcg_temp_free(zero);
 
                     /* Extract the LEN into a mask.  Lengths larger than
@@ -3843,10 +3857,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_movi_tl(cpu_T1, 1);
                     tcg_gen_shl_tl(cpu_T1, cpu_T1, s->A0);
                     tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
-                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
 
-                    gen_op_mov_reg_v(ot, reg, cpu_T0);
-                    gen_op_update1_cc();
+                    gen_op_mov_reg_v(ot, reg, s->T0);
+                    gen_op_update1_cc(s);
                     set_cc_op(s, CC_OP_LOGICB + ot);
                 }
                 break;
@@ -3872,9 +3886,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 tcg_gen_movi_tl(s->A0, -1);
                 tcg_gen_shl_tl(s->A0, s->A0, cpu_T1);
-                tcg_gen_andc_tl(cpu_T0, cpu_T0, s->A0);
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
-                gen_op_update1_cc();
+                tcg_gen_andc_tl(s->T0, s->T0, s->A0);
+                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_update1_cc(s);
                 set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
 
@@ -3888,7 +3902,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 switch (ot) {
                 default:
-                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                     tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
                     tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                                       cpu_tmp2_i32, cpu_tmp3_i32);
@@ -3897,9 +3911,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     break;
 #ifdef TARGET_X86_64
                 case MO_64:
-                    tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
-                                      cpu_T0, cpu_regs[R_EDX]);
-                    tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
+                    tcg_gen_mulu2_i64(s->T0, cpu_T1,
+                                      s->T0, cpu_regs[R_EDX]);
+                    tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
                     tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
                     break;
 #endif
@@ -3921,7 +3935,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 } else {
                     tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
+                gen_helper_pdep(cpu_regs[reg], s->T0, cpu_T1);
                 break;
 
             case 0x2f5: /* pext Gy, By, Ey */
@@ -3939,7 +3953,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 } else {
                     tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
+                gen_helper_pext(cpu_regs[reg], s->T0, cpu_T1);
                 break;
 
             case 0x1f6: /* adcx Gy, Ey */
@@ -3997,22 +4011,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         /* If we know TL is 64-bit, and we want a 32-bit
                            result, just do everything in 64-bit arithmetic.  */
                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
-                        tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
-                        tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
-                        tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
-                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
-                        tcg_gen_shri_i64(carry_out, cpu_T0, 32);
+                        tcg_gen_ext32u_i64(s->T0, s->T0);
+                        tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
+                        tcg_gen_add_i64(s->T0, s->T0, carry_in);
+                        tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
+                        tcg_gen_shri_i64(carry_out, s->T0, 32);
                         break;
 #endif
                     default:
                         /* Otherwise compute the carry-out in two steps.  */
                         zero = tcg_const_tl(0);
-                        tcg_gen_add2_tl(cpu_T0, carry_out,
-                                        cpu_T0, zero,
+                        tcg_gen_add2_tl(s->T0, carry_out,
+                                        s->T0, zero,
                                         carry_in, zero);
                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
                                         cpu_regs[reg], carry_out,
-                                        cpu_T0, zero);
+                                        s->T0, zero);
                         tcg_temp_free(zero);
                         break;
                     }
@@ -4036,19 +4050,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
                 }
                 if (b == 0x1f7) {
-                    tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
                 } else if (b == 0x2f7) {
                     if (ot != MO_64) {
-                        tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext32s_tl(s->T0, s->T0);
                     }
-                    tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
                 } else {
                     if (ot != MO_64) {
-                        tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext32u_tl(s->T0, s->T0);
                     }
-                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
                 }
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(ot, reg, s->T0);
                 break;
 
             case 0x0f3:
@@ -4063,25 +4077,25 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 
-                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+                tcg_gen_mov_tl(cpu_cc_src, s->T0);
                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
-                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
-                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
+                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
                     break;
                 case 2: /* blsmsk By,Ey */
-                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
-                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
+                    tcg_gen_xor_tl(s->T0, s->T0, cpu_T1);
                     break;
                 case 3: /* blsi By, Ey */
-                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
-                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_neg_tl(cpu_T1, s->T0);
+                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
                     break;
                 default:
                     goto unknown_op;
                 }
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                gen_op_mov_reg_v(ot, s->vex_v, s->T0);
                 set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
 
@@ -4119,22 +4133,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 val = x86_ldub_code(env, s);
                 switch (b) {
                 case 0x14: /* pextrb */
-                    tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                    tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_B(val & 15)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, cpu_T0);
+                        gen_op_mov_reg_v(ot, rm, s->T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
+                        tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_UB);
                     }
                     break;
                 case 0x15: /* pextrw */
-                    tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                    tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_W(val & 7)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, cpu_T0);
+                        gen_op_mov_reg_v(ot, rm, s->T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
+                        tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_LEUW);
                     }
                     break;
@@ -4166,23 +4180,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     }
                     break;
                 case 0x17: /* extractps */
-                    tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                    tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_L(val & 3)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, cpu_T0);
+                        gen_op_mov_reg_v(ot, rm, s->T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
+                        tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_LEUL);
                     }
                     break;
                 case 0x20: /* pinsrb */
                     if (mod == 3) {
-                        gen_op_mov_v_reg(MO_32, cpu_T0, rm);
+                        gen_op_mov_v_reg(MO_32, s->T0, rm);
                     } else {
-                        tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
+                        tcg_gen_qemu_ld_tl(s->T0, s->A0,
                                            s->mem_index, MO_UB);
                     }
-                    tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                    tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_B(val & 15)));
                     break;
                 case 0x21: /* insertps */
@@ -4297,13 +4311,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 b = x86_ldub_code(env, s);
                 if (ot == MO_64) {
-                    tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
+                    tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
                 } else {
-                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                     tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
-                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
+                    tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
                 }
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(ot, reg, s->T0);
                 break;
 
             default:
@@ -4360,8 +4374,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 switch (sz) {
                 case 2:
                     /* 32 bit access */
-                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
-                    tcg_gen_st32_tl(cpu_T0, cpu_env,
+                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
+                    tcg_gen_st32_tl(s->T0, cpu_env,
                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
                     break;
                 case 3:
@@ -4657,8 +4671,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 xor_zero:
                     /* xor reg, reg optimisation */
                     set_cc_op(s, CC_OP_CLR);
-                    tcg_gen_movi_tl(cpu_T0, 0);
-                    gen_op_mov_reg_v(ot, reg, cpu_T0);
+                    tcg_gen_movi_tl(s->T0, 0);
+                    gen_op_mov_reg_v(ot, reg, s->T0);
                     break;
                 } else {
                     opreg = rm;
@@ -4760,17 +4774,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* For those below that handle locked memory, don't load here.  */
             if (!(s->prefix & PREFIX_LOCK)
                 || op != 2) {
-                gen_op_ld_v(s, ot, cpu_T0, s->A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(ot, s->T0, rm);
         }
 
         switch(op) {
         case 0: /* test */
             val = insn_get(env, s, ot);
             tcg_gen_movi_tl(cpu_T1, val);
-            gen_op_testl_T0_T1_cc();
+            gen_op_testl_T0_T1_cc(s);
             set_cc_op(s, CC_OP_LOGICB + ot);
             break;
         case 2: /* not */
@@ -4778,15 +4792,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 if (mod == 3) {
                     goto illegal_op;
                 }
-                tcg_gen_movi_tl(cpu_T0, ~0);
-                tcg_gen_atomic_xor_fetch_tl(cpu_T0, s->A0, cpu_T0,
+                tcg_gen_movi_tl(s->T0, ~0);
+                tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
                                             s->mem_index, ot | MO_LE);
             } else {
-                tcg_gen_not_tl(cpu_T0, cpu_T0);
+                tcg_gen_not_tl(s->T0, s->T0);
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, s->A0);
+                    gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                    gen_op_mov_reg_v(ot, rm, s->T0);
                 }
             }
             break;
@@ -4803,7 +4817,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 label1 = gen_new_label();
 
                 tcg_gen_mov_tl(a0, s->A0);
-                tcg_gen_mov_tl(t0, cpu_T0);
+                tcg_gen_mov_tl(t0, s->T0);
 
                 gen_set_label(label1);
                 t1 = tcg_temp_new();
@@ -4817,14 +4831,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
                 tcg_temp_free(t2);
                 tcg_temp_free(a0);
-                tcg_gen_mov_tl(cpu_T0, t0);
+                tcg_gen_mov_tl(s->T0, t0);
                 tcg_temp_free(t0);
             } else {
-                tcg_gen_neg_tl(cpu_T0, cpu_T0);
+                tcg_gen_neg_tl(s->T0, s->T0);
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, s->A0);
+                    gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                    gen_op_mov_reg_v(ot, rm, s->T0);
                 }
             }
             gen_op_update_neg_cc(s);
@@ -4834,31 +4848,31 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             switch(ot) {
             case MO_8:
                 gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
-                tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext8u_tl(s->T0, s->T0);
                 tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
+                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
                 gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
-                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext16u_tl(s->T0, s->T0);
                 tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
-                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                tcg_gen_shri_tl(s->T0, s->T0, 16);
+                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_src, s->T0);
                 set_cc_op(s, CC_OP_MULW);
                 break;
             default:
             case MO_32:
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
                 tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                                   cpu_tmp2_i32, cpu_tmp3_i32);
@@ -4871,7 +4885,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 #ifdef TARGET_X86_64
             case MO_64:
                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
-                                  cpu_T0, cpu_regs[R_EAX]);
+                                  s->T0, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
                 set_cc_op(s, CC_OP_MULQ);
@@ -4883,33 +4897,33 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             switch(ot) {
             case MO_8:
                 gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
-                tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext8s_tl(s->T0, s->T0);
                 tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
-                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
+                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                tcg_gen_ext8s_tl(cpu_tmp0, s->T0);
+                tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
                 gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
-                tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext16s_tl(s->T0, s->T0);
                 tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
-                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
-                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
-                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
+                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
+                tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
+                tcg_gen_shri_tl(s->T0, s->T0, 16);
+                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
                 set_cc_op(s, CC_OP_MULW);
                 break;
             default:
             case MO_32:
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
                 tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                                   cpu_tmp2_i32, cpu_tmp3_i32);
@@ -4924,7 +4938,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 #ifdef TARGET_X86_64
             case MO_64:
                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
-                                  cpu_T0, cpu_regs[R_EAX]);
+                                  s->T0, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
@@ -4936,18 +4950,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 6: /* div */
             switch(ot) {
             case MO_8:
-                gen_helper_divb_AL(cpu_env, cpu_T0);
+                gen_helper_divb_AL(cpu_env, s->T0);
                 break;
             case MO_16:
-                gen_helper_divw_AX(cpu_env, cpu_T0);
+                gen_helper_divw_AX(cpu_env, s->T0);
                 break;
             default:
             case MO_32:
-                gen_helper_divl_EAX(cpu_env, cpu_T0);
+                gen_helper_divl_EAX(cpu_env, s->T0);
                 break;
 #ifdef TARGET_X86_64
             case MO_64:
-                gen_helper_divq_EAX(cpu_env, cpu_T0);
+                gen_helper_divq_EAX(cpu_env, s->T0);
                 break;
 #endif
             }
@@ -4955,18 +4969,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 7: /* idiv */
             switch(ot) {
             case MO_8:
-                gen_helper_idivb_AL(cpu_env, cpu_T0);
+                gen_helper_idivb_AL(cpu_env, s->T0);
                 break;
             case MO_16:
-                gen_helper_idivw_AX(cpu_env, cpu_T0);
+                gen_helper_idivw_AX(cpu_env, s->T0);
                 break;
             default:
             case MO_32:
-                gen_helper_idivl_EAX(cpu_env, cpu_T0);
+                gen_helper_idivl_EAX(cpu_env, s->T0);
                 break;
 #ifdef TARGET_X86_64
             case MO_64:
-                gen_helper_idivq_EAX(cpu_env, cpu_T0);
+                gen_helper_idivq_EAX(cpu_env, s->T0);
                 break;
 #endif
             }
@@ -5001,9 +5015,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod != 3) {
             gen_lea_modrm(env, s, modrm);
             if (op >= 2 && op != 3 && op != 5)
-                gen_op_ld_v(s, ot, cpu_T0, s->A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(ot, s->T0, rm);
         }
 
         switch(op) {
@@ -5024,27 +5038,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 2: /* call Ev */
             /* XXX: optimize if memory (no 'and' is necessary) */
             if (dflag == MO_16) {
-                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext16u_tl(s->T0, s->T0);
             }
             next_eip = s->pc - s->cs_base;
             tcg_gen_movi_tl(cpu_T1, next_eip);
             gen_push_v(s, cpu_T1);
-            gen_op_jmp_v(cpu_T0);
+            gen_op_jmp_v(s->T0);
             gen_bnd_jmp(s);
-            gen_jr(s, cpu_T0);
+            gen_jr(s, s->T0);
             break;
         case 3: /* lcall Ev */
             gen_op_ld_v(s, ot, cpu_T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
+            gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_lcall:
             if (s->pe && !s->vm86) {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
                                            tcg_const_i32(dflag - 1),
                                            tcg_const_tl(s->pc - s->cs_base));
             } else {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
                                       tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
@@ -5054,30 +5068,30 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
         case 4: /* jmp Ev */
             if (dflag == MO_16) {
-                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext16u_tl(s->T0, s->T0);
             }
-            gen_op_jmp_v(cpu_T0);
+            gen_op_jmp_v(s->T0);
             gen_bnd_jmp(s);
-            gen_jr(s, cpu_T0);
+            gen_jr(s, s->T0);
             break;
         case 5: /* ljmp Ev */
             gen_op_ld_v(s, ot, cpu_T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
+            gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_ljmp:
             if (s->pe && !s->vm86) {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
                                           tcg_const_tl(s->pc - s->cs_base));
             } else {
-                gen_op_movl_seg_T0_vm(R_CS);
+                gen_op_movl_seg_T0_vm(s, R_CS);
                 gen_op_jmp_v(cpu_T1);
             }
             tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
             gen_jr(s, cpu_tmp4);
             break;
         case 6: /* push Ev */
-            gen_push_v(s, cpu_T0);
+            gen_push_v(s, s->T0);
             break;
         default:
             goto unknown_op;
@@ -5093,7 +5107,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         gen_op_mov_v_reg(ot, cpu_T1, reg);
-        gen_op_testl_T0_T1_cc();
+        gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
 
@@ -5102,9 +5116,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         ot = mo_b_d(b, dflag);
         val = insn_get(env, s, ot);
 
-        gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
+        gen_op_mov_v_reg(ot, s->T0, OR_EAX);
         tcg_gen_movi_tl(cpu_T1, val);
-        gen_op_testl_T0_T1_cc();
+        gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
 
@@ -5112,20 +5126,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         switch (dflag) {
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
-            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
+            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
+            tcg_gen_ext32s_tl(s->T0, s->T0);
+            gen_op_mov_reg_v(MO_64, R_EAX, s->T0);
             break;
 #endif
         case MO_32:
-            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
-            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
+            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
+            tcg_gen_ext16s_tl(s->T0, s->T0);
+            gen_op_mov_reg_v(MO_32, R_EAX, s->T0);
             break;
         case MO_16:
-            gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
-            tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
+            gen_op_mov_v_reg(MO_8, s->T0, R_EAX);
+            tcg_gen_ext8s_tl(s->T0, s->T0);
+            gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
             break;
         default:
             tcg_abort();
@@ -5135,22 +5149,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         switch (dflag) {
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
-            tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
-            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
+            gen_op_mov_v_reg(MO_64, s->T0, R_EAX);
+            tcg_gen_sari_tl(s->T0, s->T0, 63);
+            gen_op_mov_reg_v(MO_64, R_EDX, s->T0);
             break;
 #endif
         case MO_32:
-            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
-            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
-            tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
-            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
+            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
+            tcg_gen_ext32s_tl(s->T0, s->T0);
+            tcg_gen_sari_tl(s->T0, s->T0, 31);
+            gen_op_mov_reg_v(MO_32, R_EDX, s->T0);
             break;
         case MO_16:
-            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
-            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
-            tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
-            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
+            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
+            tcg_gen_ext16s_tl(s->T0, s->T0);
+            tcg_gen_sari_tl(s->T0, s->T0, 15);
+            gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
             break;
         default:
             tcg_abort();
@@ -5179,14 +5193,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         switch (ot) {
 #ifdef TARGET_X86_64
         case MO_64:
-            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
+            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, s->T0, cpu_T1);
             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
             break;
 #endif
         case MO_32:
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
             tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
             tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                               cpu_tmp2_i32, cpu_tmp3_i32);
@@ -5197,14 +5211,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
             break;
         default:
-            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
+            tcg_gen_ext16s_tl(s->T0, s->T0);
             tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
             /* XXX: use 32 bit mul which could be faster */
-            tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
-            tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
-            gen_op_mov_reg_v(ot, reg, cpu_T0);
+            tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+            tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
+            tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
+            gen_op_mov_reg_v(ot, reg, s->T0);
             break;
         }
         set_cc_op(s, CC_OP_MULB + ot);
@@ -5215,27 +5229,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
-        gen_op_mov_v_reg(ot, cpu_T0, reg);
+        gen_op_mov_v_reg(ot, s->T0, reg);
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
             gen_op_mov_v_reg(ot, cpu_T1, rm);
-            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
             gen_op_mov_reg_v(ot, reg, cpu_T1);
-            gen_op_mov_reg_v(ot, rm, cpu_T0);
+            gen_op_mov_reg_v(ot, rm, s->T0);
         } else {
             gen_lea_modrm(env, s, modrm);
             if (s->prefix & PREFIX_LOCK) {
-                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, cpu_T0,
+                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, s->T0,
                                             s->mem_index, ot | MO_LE);
-                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
             } else {
                 gen_op_ld_v(s, ot, cpu_T1, s->A0);
-                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_st_v(s, ot, cpu_T0, s->A0);
+                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
+                gen_op_st_v(s, ot, s->T0, s->A0);
             }
             gen_op_mov_reg_v(ot, reg, cpu_T1);
         }
-        gen_op_update2_cc();
+        gen_op_update2_cc(s);
         set_cc_op(s, CC_OP_ADDB + ot);
         break;
     case 0x1b0:
@@ -5328,14 +5342,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         /**************************/
         /* push/pop */
     case 0x50 ... 0x57: /* push */
-        gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
-        gen_push_v(s, cpu_T0);
+        gen_op_mov_v_reg(MO_32, s->T0, (b & 7) | REX_B(s));
+        gen_push_v(s, s->T0);
         break;
     case 0x58 ... 0x5f: /* pop */
         ot = gen_pop_T0(s);
         /* NOTE: order is important for pop %sp */
         gen_pop_update(s, ot);
-        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
+        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), s->T0);
         break;
     case 0x60: /* pusha */
         if (CODE64(s))
@@ -5354,8 +5368,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             val = insn_get(env, s, ot);
         else
             val = (int8_t)insn_get(env, s, MO_8);
-        tcg_gen_movi_tl(cpu_T0, val);
-        gen_push_v(s, cpu_T0);
+        tcg_gen_movi_tl(s->T0, val);
+        gen_push_v(s, s->T0);
         break;
     case 0x8f: /* pop Ev */
         modrm = x86_ldub_code(env, s);
@@ -5365,7 +5379,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* NOTE: order is important for pop %sp */
             gen_pop_update(s, ot);
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_reg_v(ot, rm, cpu_T0);
+            gen_op_mov_reg_v(ot, rm, s->T0);
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
@@ -5391,13 +5405,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x1e: /* push ds */
         if (CODE64(s))
             goto illegal_op;
-        gen_op_movl_T0_seg(b >> 3);
-        gen_push_v(s, cpu_T0);
+        gen_op_movl_T0_seg(s, b >> 3);
+        gen_push_v(s, s->T0);
         break;
     case 0x1a0: /* push fs */
     case 0x1a8: /* push gs */
-        gen_op_movl_T0_seg((b >> 3) & 7);
-        gen_push_v(s, cpu_T0);
+        gen_op_movl_T0_seg(s, (b >> 3) & 7);
+        gen_push_v(s, s->T0);
         break;
     case 0x07: /* pop es */
     case 0x17: /* pop ss */
@@ -5451,11 +5465,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
         }
         val = insn_get(env, s, ot);
-        tcg_gen_movi_tl(cpu_T0, val);
+        tcg_gen_movi_tl(s->T0, val);
         if (mod != 3) {
-            gen_op_st_v(s, ot, cpu_T0, s->A0);
+            gen_op_st_v(s, ot, s->T0, s->A0);
         } else {
-            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
+            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), s->T0);
         }
         break;
     case 0x8a:
@@ -5465,7 +5479,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_reg_v(ot, reg, cpu_T0);
+        gen_op_mov_reg_v(ot, reg, s->T0);
         break;
     case 0x8e: /* mov seg, Gv */
         modrm = x86_ldub_code(env, s);
@@ -5491,7 +5505,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         mod = (modrm >> 6) & 3;
         if (reg >= 6)
             goto illegal_op;
-        gen_op_movl_T0_seg(reg);
+        gen_op_movl_T0_seg(s, reg);
         ot = mod == 3 ? dflag : MO_16;
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
         break;
@@ -5518,30 +5532,30 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
             if (mod == 3) {
                 if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
-                    tcg_gen_sextract_tl(cpu_T0, cpu_regs[rm - 4], 8, 8);
+                    tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
                 } else {
-                    gen_op_mov_v_reg(ot, cpu_T0, rm);
+                    gen_op_mov_v_reg(ot, s->T0, rm);
                     switch (s_ot) {
                     case MO_UB:
-                        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext8u_tl(s->T0, s->T0);
                         break;
                     case MO_SB:
-                        tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext8s_tl(s->T0, s->T0);
                         break;
                     case MO_UW:
-                        tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext16u_tl(s->T0, s->T0);
                         break;
                     default:
                     case MO_SW:
-                        tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext16s_tl(s->T0, s->T0);
                         break;
                     }
                 }
-                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
+                gen_op_mov_reg_v(d_ot, reg, s->T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, s_ot, cpu_T0, s->A0);
-                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
+                gen_op_ld_v(s, s_ot, s->T0, s->A0);
+                gen_op_mov_reg_v(d_ot, reg, s->T0);
             }
         }
         break;
@@ -5581,27 +5595,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_gen_movi_tl(s->A0, offset_addr);
             gen_add_A0_ds_seg(s);
             if ((b & 2) == 0) {
-                gen_op_ld_v(s, ot, cpu_T0, s->A0);
-                gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
+                gen_op_mov_reg_v(ot, R_EAX, s->T0);
             } else {
-                gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
-                gen_op_st_v(s, ot, cpu_T0, s->A0);
+                gen_op_mov_v_reg(ot, s->T0, R_EAX);
+                gen_op_st_v(s, ot, s->T0, s->A0);
             }
         }
         break;
     case 0xd7: /* xlat */
         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
-        tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
-        tcg_gen_add_tl(s->A0, s->A0, cpu_T0);
+        tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
+        tcg_gen_add_tl(s->A0, s->A0, s->T0);
         gen_extu(s->aflag, s->A0);
         gen_add_A0_ds_seg(s);
-        gen_op_ld_v(s, MO_8, cpu_T0, s->A0);
-        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
+        gen_op_ld_v(s, MO_8, s->T0, s->A0);
+        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
         break;
     case 0xb0 ... 0xb7: /* mov R, Ib */
         val = insn_get(env, s, MO_8);
-        tcg_gen_movi_tl(cpu_T0, val);
-        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0);
+        tcg_gen_movi_tl(s->T0, val);
+        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), s->T0);
         break;
     case 0xb8 ... 0xbf: /* mov R, Iv */
 #ifdef TARGET_X86_64
@@ -5610,16 +5624,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* 64 bit case */
             tmp = x86_ldq_code(env, s);
             reg = (b & 7) | REX_B(s);
-            tcg_gen_movi_tl(cpu_T0, tmp);
-            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
+            tcg_gen_movi_tl(s->T0, tmp);
+            gen_op_mov_reg_v(MO_64, reg, s->T0);
         } else
 #endif
         {
             ot = dflag;
             val = insn_get(env, s, ot);
             reg = (b & 7) | REX_B(s);
-            tcg_gen_movi_tl(cpu_T0, val);
-            gen_op_mov_reg_v(ot, reg, cpu_T0);
+            tcg_gen_movi_tl(s->T0, val);
+            gen_op_mov_reg_v(ot, reg, s->T0);
         }
         break;
 
@@ -5638,15 +5652,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
         do_xchg_reg:
-            gen_op_mov_v_reg(ot, cpu_T0, reg);
+            gen_op_mov_v_reg(ot, s->T0, reg);
             gen_op_mov_v_reg(ot, cpu_T1, rm);
-            gen_op_mov_reg_v(ot, rm, cpu_T0);
+            gen_op_mov_reg_v(ot, rm, s->T0);
             gen_op_mov_reg_v(ot, reg, cpu_T1);
         } else {
             gen_lea_modrm(env, s, modrm);
-            gen_op_mov_v_reg(ot, cpu_T0, reg);
+            gen_op_mov_v_reg(ot, s->T0, reg);
             /* for xchg, lock is implicit */
-            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, cpu_T0,
+            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, s->T0,
                                    s->mem_index, ot | MO_LE);
             gen_op_mov_reg_v(ot, reg, cpu_T1);
         }
@@ -5678,7 +5692,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_op_ld_v(s, ot, cpu_T1, s->A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
-        gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
+        gen_op_ld_v(s, MO_16, s->T0, s->A0);
         gen_movl_seg_T0(s, op);
         /* then put the data */
         gen_op_mov_reg_v(ot, reg, cpu_T1);
@@ -6220,8 +6234,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 switch(rm) {
                 case 0:
                     gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
-                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
-                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
+                    tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
+                    gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                     break;
                 default:
                     goto unknown_op;
@@ -6331,7 +6345,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x6c: /* insS */
     case 0x6d:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base, 
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -6346,7 +6360,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x6e: /* outsS */
     case 0x6f:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -6366,7 +6380,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xe5:
         ot = mo_b_d32(b, dflag);
         val = x86_ldub_code(env, s);
-        tcg_gen_movi_tl(cpu_T0, val);
+        tcg_gen_movi_tl(s->T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -6385,7 +6399,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xe7:
         ot = mo_b_d32(b, dflag);
         val = x86_ldub_code(env, s);
-        tcg_gen_movi_tl(cpu_T0, val);
+        tcg_gen_movi_tl(s->T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
         gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
@@ -6405,13 +6419,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xec:
     case 0xed:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
         gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
         gen_bpt_io(s, cpu_tmp2_i32, ot);
@@ -6423,7 +6437,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xee:
     case 0xef:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
         gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
@@ -6431,7 +6445,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
         gen_bpt_io(s, cpu_tmp2_i32, ot);
@@ -6448,17 +6462,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         ot = gen_pop_T0(s);
         gen_stack_update(s, val + (1 << ot));
         /* Note that gen_pop_T0 uses a zero-extending load.  */
-        gen_op_jmp_v(cpu_T0);
+        gen_op_jmp_v(s->T0);
         gen_bnd_jmp(s);
-        gen_jr(s, cpu_T0);
+        gen_jr(s, s->T0);
         break;
     case 0xc3: /* ret */
         ot = gen_pop_T0(s);
         gen_pop_update(s, ot);
         /* Note that gen_pop_T0 uses a zero-extending load.  */
-        gen_op_jmp_v(cpu_T0);
+        gen_op_jmp_v(s->T0);
         gen_bnd_jmp(s);
-        gen_jr(s, cpu_T0);
+        gen_jr(s, s->T0);
         break;
     case 0xca: /* lret im */
         val = x86_ldsw_code(env, s);
@@ -6471,14 +6485,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         } else {
             gen_stack_A0(s);
             /* pop offset */
-            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
+            gen_op_ld_v(s, dflag, s->T0, s->A0);
             /* NOTE: keeping EIP updated is not a problem in case of
                exception */
-            gen_op_jmp_v(cpu_T0);
+            gen_op_jmp_v(s->T0);
             /* pop selector */
             gen_add_A0_im(s, 1 << dflag);
-            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
-            gen_op_movl_seg_T0_vm(R_CS);
+            gen_op_ld_v(s, dflag, s->T0, s->A0);
+            gen_op_movl_seg_T0_vm(s, R_CS);
             /* add stack offset */
             gen_stack_update(s, val + (2 << dflag));
         }
@@ -6521,8 +6535,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else if (!CODE64(s)) {
                 tval &= 0xffffffff;
             }
-            tcg_gen_movi_tl(cpu_T0, next_eip);
-            gen_push_v(s, cpu_T0);
+            tcg_gen_movi_tl(s->T0, next_eip);
+            gen_push_v(s, s->T0);
             gen_bnd_jmp(s);
             gen_jmp(s, tval);
         }
@@ -6537,7 +6551,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             offset = insn_get(env, s, ot);
             selector = insn_get(env, s, MO_16);
 
-            tcg_gen_movi_tl(cpu_T0, selector);
+            tcg_gen_movi_tl(s->T0, selector);
             tcg_gen_movi_tl(cpu_T1, offset);
         }
         goto do_lcall;
@@ -6566,7 +6580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             offset = insn_get(env, s, ot);
             selector = insn_get(env, s, MO_16);
 
-            tcg_gen_movi_tl(cpu_T0, selector);
+            tcg_gen_movi_tl(s->T0, selector);
             tcg_gen_movi_tl(cpu_T1, offset);
         }
         goto do_ljmp;
@@ -6599,7 +6613,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
     case 0x190 ... 0x19f: /* setcc Gv */
         modrm = x86_ldub_code(env, s);
-        gen_setcc1(s, b, cpu_T0);
+        gen_setcc1(s, b, s->T0);
         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
@@ -6620,8 +6634,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
-            gen_helper_read_eflags(cpu_T0, cpu_env);
-            gen_push_v(s, cpu_T0);
+            gen_helper_read_eflags(s->T0, cpu_env);
+            gen_push_v(s, s->T0);
         }
         break;
     case 0x9d: /* popf */
@@ -6632,13 +6646,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             ot = gen_pop_T0(s);
             if (s->cpl == 0) {
                 if (dflag != MO_16) {
-                    gen_helper_write_eflags(cpu_env, cpu_T0,
+                    gen_helper_write_eflags(cpu_env, s->T0,
                                             tcg_const_i32((TF_MASK | AC_MASK |
                                                            ID_MASK | NT_MASK |
                                                            IF_MASK |
                                                            IOPL_MASK)));
                 } else {
-                    gen_helper_write_eflags(cpu_env, cpu_T0,
+                    gen_helper_write_eflags(cpu_env, s->T0,
                                             tcg_const_i32((TF_MASK | AC_MASK |
                                                            ID_MASK | NT_MASK |
                                                            IF_MASK | IOPL_MASK)
@@ -6647,14 +6661,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 if (s->cpl <= s->iopl) {
                     if (dflag != MO_16) {
-                        gen_helper_write_eflags(cpu_env, cpu_T0,
+                        gen_helper_write_eflags(cpu_env, s->T0,
                                                 tcg_const_i32((TF_MASK |
                                                                AC_MASK |
                                                                ID_MASK |
                                                                NT_MASK |
                                                                IF_MASK)));
                     } else {
-                        gen_helper_write_eflags(cpu_env, cpu_T0,
+                        gen_helper_write_eflags(cpu_env, s->T0,
                                                 tcg_const_i32((TF_MASK |
                                                                AC_MASK |
                                                                ID_MASK |
@@ -6664,11 +6678,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     }
                 } else {
                     if (dflag != MO_16) {
-                        gen_helper_write_eflags(cpu_env, cpu_T0,
+                        gen_helper_write_eflags(cpu_env, s->T0,
                                            tcg_const_i32((TF_MASK | AC_MASK |
                                                           ID_MASK | NT_MASK)));
                     } else {
-                        gen_helper_write_eflags(cpu_env, cpu_T0,
+                        gen_helper_write_eflags(cpu_env, s->T0,
                                            tcg_const_i32((TF_MASK | AC_MASK |
                                                           ID_MASK | NT_MASK)
                                                          & 0xffff));
@@ -6685,19 +6699,19 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x9e: /* sahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
-        gen_op_mov_v_reg(MO_8, cpu_T0, R_AH);
+        gen_op_mov_v_reg(MO_8, s->T0, R_AH);
         gen_compute_eflags(s);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
-        tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
-        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0);
+        tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
+        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
         break;
     case 0x9f: /* lahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
         gen_compute_eflags(s);
         /* Note: gen_compute_eflags() only gives the condition codes */
-        tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02);
-        gen_op_mov_reg_v(MO_8, R_AH, cpu_T0);
+        tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
+        gen_op_mov_reg_v(MO_8, R_AH, s->T0);
         break;
     case 0xf5: /* cmc */
         gen_compute_eflags(s);
@@ -6732,10 +6746,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             s->rip_offset = 1;
             gen_lea_modrm(env, s, modrm);
             if (!(s->prefix & PREFIX_LOCK)) {
-                gen_op_ld_v(s, ot, cpu_T0, s->A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(ot, s->T0, rm);
         }
         /* load shift */
         val = x86_ldub_code(env, s);
@@ -6771,10 +6785,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
             if (!(s->prefix & PREFIX_LOCK)) {
-                gen_op_ld_v(s, ot, cpu_T0, s->A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(ot, s->T0, rm);
         }
     bt_op:
         tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
@@ -6785,46 +6799,46 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             case 0: /* bt */
                 /* Needs no atomic ops; we surpressed the normal
                    memory load for LOCK above so do it now.  */
-                gen_op_ld_v(s, ot, cpu_T0, s->A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
                 break;
             case 1: /* bts */
-                tcg_gen_atomic_fetch_or_tl(cpu_T0, s->A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, cpu_tmp0,
                                            s->mem_index, ot | MO_LE);
                 break;
             case 2: /* btr */
                 tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
-                tcg_gen_atomic_fetch_and_tl(cpu_T0, s->A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, cpu_tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             default:
             case 3: /* btc */
-                tcg_gen_atomic_fetch_xor_tl(cpu_T0, s->A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, cpu_tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             }
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
+            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
         } else {
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
+            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
             switch (op) {
             case 0: /* bt */
                 /* Data already loaded; nothing to do.  */
                 break;
             case 1: /* bts */
-                tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
+                tcg_gen_or_tl(s->T0, s->T0, cpu_tmp0);
                 break;
             case 2: /* btr */
-                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
+                tcg_gen_andc_tl(s->T0, s->T0, cpu_tmp0);
                 break;
             default:
             case 3: /* btc */
-                tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
+                tcg_gen_xor_tl(s->T0, s->T0, cpu_tmp0);
                 break;
             }
             if (op != 0) {
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, s->A0);
+                    gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                    gen_op_mov_reg_v(ot, rm, s->T0);
                 }
             }
         }
@@ -6865,7 +6879,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_extu(ot, cpu_T0);
+        gen_extu(ot, s->T0);
 
         /* Note that lzcnt and tzcnt are in different extensions.  */
         if ((prefixes & PREFIX_REPZ)
@@ -6874,23 +6888,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
             int size = 8 << ot;
             /* For lzcnt/tzcnt, C bit is defined related to the input. */
-            tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+            tcg_gen_mov_tl(cpu_cc_src, s->T0);
             if (b & 1) {
                 /* For lzcnt, reduce the target_ulong result by the
                    number of zeros that we expect to find at the top.  */
-                tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS);
-                tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
+                tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
+                tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
             } else {
                 /* For tzcnt, a zero input must return the operand size.  */
-                tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size);
+                tcg_gen_ctzi_tl(s->T0, s->T0, size);
             }
             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
-            gen_op_update1_cc();
+            gen_op_update1_cc(s);
             set_cc_op(s, CC_OP_BMILGB + ot);
         } else {
             /* For bsr/bsf, only the Z bit is defined and it is related
                to the input and not the result.  */
-            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
             set_cc_op(s, CC_OP_LOGICB + ot);
 
             /* ??? The manual says that the output is undefined when the
@@ -6901,13 +6915,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 /* For bsr, return the bit index of the first 1 bit,
                    not the count of leading zeros.  */
                 tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
-                tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1);
-                tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
+                tcg_gen_clz_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
             } else {
-                tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]);
+                tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
             }
         }
-        gen_op_mov_reg_v(ot, reg, cpu_T0);
+        gen_op_mov_reg_v(ot, reg, s->T0);
         break;
         /************************/
         /* bcd */
@@ -7047,9 +7061,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_op_mov_v_reg(ot, cpu_T0, reg);
+        gen_op_mov_v_reg(ot, s->T0, reg);
         gen_lea_modrm(env, s, modrm);
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
         if (ot == MO_16) {
             gen_helper_boundw(cpu_env, s->A0, cpu_tmp2_i32);
         } else {
@@ -7060,24 +7074,24 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = (b & 7) | REX_B(s);
 #ifdef TARGET_X86_64
         if (dflag == MO_64) {
-            gen_op_mov_v_reg(MO_64, cpu_T0, reg);
-            tcg_gen_bswap64_i64(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
+            gen_op_mov_v_reg(MO_64, s->T0, reg);
+            tcg_gen_bswap64_i64(s->T0, s->T0);
+            gen_op_mov_reg_v(MO_64, reg, s->T0);
         } else
 #endif
         {
-            gen_op_mov_v_reg(MO_32, cpu_T0, reg);
-            tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
-            tcg_gen_bswap32_tl(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_32, reg, cpu_T0);
+            gen_op_mov_v_reg(MO_32, s->T0, reg);
+            tcg_gen_ext32u_tl(s->T0, s->T0);
+            tcg_gen_bswap32_tl(s->T0, s->T0);
+            gen_op_mov_reg_v(MO_32, reg, s->T0);
         }
         break;
     case 0xd6: /* salc */
         if (CODE64(s))
             goto illegal_op;
-        gen_compute_eflags_c(s, cpu_T0);
-        tcg_gen_neg_tl(cpu_T0, cpu_T0);
-        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
+        gen_compute_eflags_c(s, s->T0);
+        tcg_gen_neg_tl(s->T0, s->T0);
+        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
         break;
     case 0xe0: /* loopnz */
     case 0xe1: /* loopz */
@@ -7229,7 +7243,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             if (!s->pe || s->vm86)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+            tcg_gen_ld32u_tl(s->T0, cpu_env,
                              offsetof(CPUX86State, ldt.selector));
             ot = mod == 3 ? dflag : MO_16;
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
@@ -7242,7 +7256,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_lldt(cpu_env, cpu_tmp2_i32);
             }
             break;
@@ -7250,7 +7264,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             if (!s->pe || s->vm86)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+            tcg_gen_ld32u_tl(s->T0, cpu_env,
                              offsetof(CPUX86State, tr.selector));
             ot = mod == 3 ? dflag : MO_16;
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
@@ -7263,7 +7277,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
                 gen_helper_ltr(cpu_env, cpu_tmp2_i32);
             }
             break;
@@ -7274,9 +7288,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
             gen_update_cc_op(s);
             if (op == 4) {
-                gen_helper_verr(cpu_env, cpu_T0);
+                gen_helper_verr(cpu_env, s->T0);
             } else {
-                gen_helper_verw(cpu_env, cpu_T0);
+                gen_helper_verw(cpu_env, s->T0);
             }
             set_cc_op(s, CC_OP_EFLAGS);
             break;
@@ -7291,15 +7305,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         CASE_MODRM_MEM_OP(0): /* sgdt */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_ld32u_tl(cpu_T0,
+            tcg_gen_ld32u_tl(s->T0,
                              cpu_env, offsetof(CPUX86State, gdt.limit));
-            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
+            gen_op_st_v(s, MO_16, s->T0, s->A0);
             gen_add_A0_im(s, 2);
-            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
+            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
             if (dflag == MO_16) {
-                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
-            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
+            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             break;
 
         case 0xc8: /* monitor */
@@ -7347,14 +7361,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         CASE_MODRM_MEM_OP(1): /* sidt */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
-            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
+            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
+            gen_op_st_v(s, MO_16, s->T0, s->A0);
             gen_add_A0_im(s, 2);
-            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
+            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
             if (dflag == MO_16) {
-                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
-            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
+            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             break;
 
         case 0xd0: /* xgetbv */
@@ -7500,11 +7514,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
             gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
             gen_add_A0_im(s, 2);
-            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
+            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             if (dflag == MO_16) {
-                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
-            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
+            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
             tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
             break;
 
@@ -7517,17 +7531,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_lea_modrm(env, s, modrm);
             gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
             gen_add_A0_im(s, 2);
-            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
+            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             if (dflag == MO_16) {
-                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
-            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
+            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
             tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
             break;
 
         CASE_MODRM_OP(4): /* smsw */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
-            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
+            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
             if (CODE64(s)) {
                 mod = (modrm >> 6) & 3;
                 ot = (mod != 3 ? MO_16 : s->dflag);
@@ -7560,7 +7574,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-            gen_helper_lmsw(cpu_env, cpu_T0);
+            gen_helper_lmsw(cpu_env, s->T0);
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
             break;
@@ -7584,10 +7598,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 if (s->cpl != 0) {
                     gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 } else {
-                    tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]);
+                    tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
                                   offsetof(CPUX86State, kernelgsbase));
-                    tcg_gen_st_tl(cpu_T0, cpu_env,
+                    tcg_gen_st_tl(s->T0, cpu_env,
                                   offsetof(CPUX86State, kernelgsbase));
                 }
                 break;
@@ -7638,16 +7652,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                gen_op_mov_v_reg(MO_32, cpu_T0, rm);
+                gen_op_mov_v_reg(MO_32, s->T0, rm);
                 /* sign extend */
                 if (d_ot == MO_64) {
-                    tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
+                    tcg_gen_ext32s_tl(s->T0, s->T0);
                 }
-                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
+                gen_op_mov_reg_v(d_ot, reg, s->T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, s->A0);
-                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
+                gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
+                gen_op_mov_reg_v(d_ot, reg, s->T0);
             }
         } else
 #endif
@@ -7712,9 +7726,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             t0 = tcg_temp_local_new();
             gen_update_cc_op(s);
             if (b == 0x102) {
-                gen_helper_lar(t0, cpu_env, cpu_T0);
+                gen_helper_lar(t0, cpu_env, s->T0);
             } else {
-                gen_helper_lsl(t0, cpu_env, cpu_T0);
+                gen_helper_lsl(t0, cpu_env, s->T0);
             }
             tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
             label1 = gen_new_label();
@@ -7816,16 +7830,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 }
                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
                 if (a.index >= 0) {
-                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
+                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
                 } else {
-                    tcg_gen_movi_tl(cpu_T0, 0);
+                    tcg_gen_movi_tl(s->T0, 0);
                 }
                 if (CODE64(s)) {
-                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, cpu_T0);
+                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
                 } else {
-                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, cpu_T0);
+                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
                 }
@@ -7921,15 +7935,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 }
                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
                 if (a.index >= 0) {
-                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
+                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
                 } else {
-                    tcg_gen_movi_tl(cpu_T0, 0);
+                    tcg_gen_movi_tl(s->T0, 0);
                 }
                 if (CODE64(s)) {
-                    gen_helper_bndstx64(cpu_env, s->A0, cpu_T0,
+                    gen_helper_bndstx64(cpu_env, s->A0, s->T0,
                                         cpu_bndl[reg], cpu_bndu[reg]);
                 } else {
-                    gen_helper_bndstx32(cpu_env, s->A0, cpu_T0,
+                    gen_helper_bndstx32(cpu_env, s->A0, s->T0,
                                         cpu_bndl[reg], cpu_bndu[reg]);
                 }
             }
@@ -7973,9 +7987,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_start();
                     }
-                    gen_op_mov_v_reg(ot, cpu_T0, rm);
+                    gen_op_mov_v_reg(ot, s->T0, rm);
                     gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
-                                         cpu_T0);
+                                         s->T0);
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_end();
                     }
@@ -7985,8 +7999,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_start();
                     }
-                    gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg));
-                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                    gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
+                    gen_op_mov_reg_v(ot, rm, s->T0);
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_end();
                     }
@@ -8019,16 +8033,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
-                gen_op_mov_v_reg(ot, cpu_T0, rm);
+                gen_op_mov_v_reg(ot, s->T0, rm);
                 tcg_gen_movi_i32(cpu_tmp2_i32, reg);
-                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0);
+                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, s->T0);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
                 tcg_gen_movi_i32(cpu_tmp2_i32, reg);
-                gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32);
-                gen_op_mov_reg_v(ot, rm, cpu_T0);
+                gen_helper_get_dr(s->T0, cpu_env, cpu_tmp2_i32);
+                gen_op_mov_reg_v(ot, rm, s->T0);
             }
         }
         break;
@@ -8107,8 +8121,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
-            gen_op_st_v(s, MO_32, cpu_T0, s->A0);
+            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
+            gen_op_st_v(s, MO_32, s->T0, s->A0);
             break;
 
         CASE_MODRM_MEM_OP(4): /* xsave */
@@ -8287,10 +8301,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_extu(ot, cpu_T0);
-        tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
-        tcg_gen_ctpop_tl(cpu_T0, cpu_T0);
-        gen_op_mov_reg_v(ot, reg, cpu_T0);
+        gen_extu(ot, s->T0);
+        tcg_gen_mov_tl(cpu_cc_src, s->T0);
+        tcg_gen_ctpop_tl(s->T0, s->T0);
+        gen_op_mov_reg_v(ot, reg, s->T0);
 
         set_cc_op(s, CC_OP_POPCNT);
         break;
@@ -8456,7 +8470,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
         printf("ERROR addseg\n");
 #endif
 
-    cpu_T0 = tcg_temp_new();
+    dc->T0 = tcg_temp_new();
     cpu_T1 = tcg_temp_new();
     dc->A0 = tcg_temp_new();
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (2 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:48   ` Richard Henderson
  2018-09-13 14:26   ` Alex Bennée
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 05/13] target/i386: move cpu_tmp0 " Emilio G. Cota
                   ` (9 subsequent siblings)
  13 siblings, 2 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 341 ++++++++++++++++++++--------------------
 1 file changed, 170 insertions(+), 171 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 73fd7e5b9a..bd27e65344 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -78,8 +78,6 @@ static TCGv cpu_regs[CPU_NB_REGS];
 static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
-/* local temps */
-static TCGv cpu_T1;
 /* local register indexes (only used inside old micro ops) */
 static TCGv cpu_tmp0, cpu_tmp4;
 static TCGv_ptr cpu_ptr0, cpu_ptr1;
@@ -139,6 +137,7 @@ typedef struct DisasContext {
     TCGv cc_srcT;
     TCGv A0;
     TCGv T0;
+    TCGv T1;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -656,20 +655,20 @@ static void gen_op_update1_cc(DisasContext *s)
 
 static void gen_op_update2_cc(DisasContext *s)
 {
-    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
+    tcg_gen_mov_tl(cpu_cc_src, s->T1);
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
 {
     tcg_gen_mov_tl(cpu_cc_src2, reg);
-    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
+    tcg_gen_mov_tl(cpu_cc_src, s->T1);
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
 {
-    tcg_gen_and_tl(cpu_cc_dst, s->T0, cpu_T1);
+    tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
 }
 
 static void gen_op_update_neg_cc(DisasContext *s)
@@ -1090,7 +1089,7 @@ static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 static inline void gen_scas(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_v(s, ot, cpu_T1, s->A0);
+    gen_op_ld_v(s, ot, s->T1, s->A0);
     gen_op(s, OP_CMPL, ot, R_EAX);
     gen_op_movl_T0_Dshift(s, ot);
     gen_op_add_reg_T0(s, s->aflag, R_EDI);
@@ -1099,7 +1098,7 @@ static inline void gen_scas(DisasContext *s, TCGMemOp ot)
 static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_v(s, ot, cpu_T1, s->A0);
+    gen_op_ld_v(s, ot, s->T1, s->A0);
     gen_string_movl_A0_ESI(s);
     gen_op(s, OP_CMPL, ot, OR_TMP0);
     gen_op_movl_T0_Dshift(s, ot);
@@ -1274,11 +1273,11 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     case OP_ADCL:
         gen_compute_eflags_c(s1, cpu_tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(s1->T0, cpu_tmp4, cpu_T1);
+            tcg_gen_add_tl(s1->T0, cpu_tmp4, s1->T1);
             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
             tcg_gen_add_tl(s1->T0, s1->T0, cpu_tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
@@ -1288,12 +1287,12 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     case OP_SBBL:
         gen_compute_eflags_c(s1, cpu_tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(s1->T0, cpu_T1, cpu_tmp4);
+            tcg_gen_add_tl(s1->T0, s1->T1, cpu_tmp4);
             tcg_gen_neg_tl(s1->T0, s1->T0);
             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
             tcg_gen_sub_tl(s1->T0, s1->T0, cpu_tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
@@ -1302,10 +1301,10 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_ADDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
         gen_op_update2_cc(s1);
@@ -1313,13 +1312,13 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_SUBL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_neg_tl(s1->T0, cpu_T1);
+            tcg_gen_neg_tl(s1->T0, s1->T1);
             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
-            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, cpu_T1);
+            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
         } else {
             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
-            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
         gen_op_update2_cc(s1);
@@ -1328,10 +1327,10 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     default:
     case OP_ANDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_and_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
         gen_op_update1_cc(s1);
@@ -1339,10 +1338,10 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_ORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
                                        s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_or_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
         gen_op_update1_cc(s1);
@@ -1350,19 +1349,19 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
         break;
     case OP_XORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, cpu_T1,
+            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_xor_tl(s1->T0, s1->T0, cpu_T1);
+            tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
         gen_op_update1_cc(s1);
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_CMPL:
-        tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
+        tcg_gen_mov_tl(cpu_cc_src, s1->T1);
         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
-        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, cpu_T1);
+        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     }
@@ -1447,28 +1446,28 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
         gen_op_mov_v_reg(ot, s->T0, op1);
     }
 
-    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
-    tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
+    tcg_gen_andi_tl(s->T1, s->T1, mask);
+    tcg_gen_subi_tl(cpu_tmp0, s->T1, 1);
 
     if (is_right) {
         if (is_arith) {
             gen_exts(ot, s->T0);
             tcg_gen_sar_tl(cpu_tmp0, s->T0, cpu_tmp0);
-            tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
+            tcg_gen_sar_tl(s->T0, s->T0, s->T1);
         } else {
             gen_extu(ot, s->T0);
             tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
-            tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
+            tcg_gen_shr_tl(s->T0, s->T0, s->T1);
         }
     } else {
         tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
-        tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
+        tcg_gen_shl_tl(s->T0, s->T0, s->T1);
     }
 
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, s->T0, cpu_tmp0, cpu_T1, is_right);
+    gen_shift_flags(s, ot, s->T0, cpu_tmp0, s->T1, is_right);
 }
 
 static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
@@ -1523,7 +1522,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
         gen_op_mov_v_reg(ot, s->T0, op1);
     }
 
-    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
+    tcg_gen_andi_tl(s->T1, s->T1, mask);
 
     switch (ot) {
     case MO_8:
@@ -1539,7 +1538,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
 #ifdef TARGET_X86_64
     case MO_32:
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
+        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
         if (is_right) {
             tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
         } else {
@@ -1550,9 +1549,9 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
 #endif
     default:
         if (is_right) {
-            tcg_gen_rotr_tl(s->T0, s->T0, cpu_T1);
+            tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
         } else {
-            tcg_gen_rotl_tl(s->T0, s->T0, cpu_T1);
+            tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
         }
         break;
     }
@@ -1584,7 +1583,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
        exactly as we computed above.  */
     t0 = tcg_const_i32(0);
     t1 = tcg_temp_new_i32();
-    tcg_gen_trunc_tl_i32(t1, cpu_T1);
+    tcg_gen_trunc_tl_i32(t1, s->T1);
     tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
     tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
@@ -1689,17 +1688,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     if (is_right) {
         switch (ot) {
         case MO_8:
-            gen_helper_rcrb(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
             break;
         case MO_16:
-            gen_helper_rcrw(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
             break;
         case MO_32:
-            gen_helper_rcrl(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
             break;
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_helper_rcrq(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
             break;
 #endif
         default:
@@ -1708,17 +1707,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     } else {
         switch (ot) {
         case MO_8:
-            gen_helper_rclb(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
             break;
         case MO_16:
-            gen_helper_rclw(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
             break;
         case MO_32:
-            gen_helper_rcll(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
             break;
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_helper_rclq(s->T0, cpu_env, s->T0, cpu_T1);
+            gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
             break;
 #endif
         default:
@@ -1752,11 +1751,11 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
            portion by constructing it as a 32-bit value.  */
         if (is_right) {
-            tcg_gen_deposit_tl(cpu_tmp0, s->T0, cpu_T1, 16, 16);
-            tcg_gen_mov_tl(cpu_T1, s->T0);
+            tcg_gen_deposit_tl(cpu_tmp0, s->T0, s->T1, 16, 16);
+            tcg_gen_mov_tl(s->T1, s->T0);
             tcg_gen_mov_tl(s->T0, cpu_tmp0);
         } else {
-            tcg_gen_deposit_tl(cpu_T1, s->T0, cpu_T1, 16, 16);
+            tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
         }
         /* FALLTHRU */
 #ifdef TARGET_X86_64
@@ -1764,11 +1763,11 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
         tcg_gen_subi_tl(cpu_tmp0, count, 1);
         if (is_right) {
-            tcg_gen_concat_tl_i64(s->T0, s->T0, cpu_T1);
+            tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
             tcg_gen_shr_i64(cpu_tmp0, s->T0, cpu_tmp0);
             tcg_gen_shr_i64(s->T0, s->T0, count);
         } else {
-            tcg_gen_concat_tl_i64(s->T0, cpu_T1, s->T0);
+            tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
             tcg_gen_shl_i64(cpu_tmp0, s->T0, cpu_tmp0);
             tcg_gen_shl_i64(s->T0, s->T0, count);
             tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
@@ -1783,24 +1782,24 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
             tcg_gen_shr_tl(s->T0, s->T0, count);
-            tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
+            tcg_gen_shl_tl(s->T1, s->T1, cpu_tmp4);
         } else {
             tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
             if (ot == MO_16) {
                 /* Only needed if count > 16, for Intel behaviour.  */
                 tcg_gen_subfi_tl(cpu_tmp4, 33, count);
-                tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
+                tcg_gen_shr_tl(cpu_tmp4, s->T1, cpu_tmp4);
                 tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
             }
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
             tcg_gen_shl_tl(s->T0, s->T0, count);
-            tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
+            tcg_gen_shr_tl(s->T1, s->T1, cpu_tmp4);
         }
         tcg_gen_movi_tl(cpu_tmp4, 0);
-        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
-                           cpu_tmp4, cpu_T1);
-        tcg_gen_or_tl(s->T0, s->T0, cpu_T1);
+        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, cpu_tmp4,
+                           cpu_tmp4, s->T1);
+        tcg_gen_or_tl(s->T0, s->T0, s->T1);
         break;
     }
 
@@ -1814,7 +1813,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
 {
     if (s != OR_TMP1)
-        gen_op_mov_v_reg(ot, cpu_T1, s);
+        gen_op_mov_v_reg(ot, s1->T1, s);
     switch(op) {
     case OP_ROL:
         gen_rot_rm_T1(s1, ot, d, 0);
@@ -1862,7 +1861,7 @@ static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
         break;
     default:
         /* currently not optimized */
-        tcg_gen_movi_tl(cpu_T1, c);
+        tcg_gen_movi_tl(s1->T1, c);
         gen_shift(s1, op, ot, d, OR_TMP1);
         break;
     }
@@ -2242,7 +2241,7 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
 
     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 
-    cc = gen_prepare_cc(s, b, cpu_T1);
+    cc = gen_prepare_cc(s, b, s->T1);
     if (cc.mask != -1) {
         TCGv t0 = tcg_temp_new();
         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
@@ -2416,8 +2415,8 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
     int size = 1 << d_ot;
 
     /* Push BP; compute FrameTemp into T1.  */
-    tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
-    gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
+    tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
+    gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
 
     level &= 31;
@@ -2430,23 +2429,23 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
             gen_op_ld_v(s, d_ot, cpu_tmp0, s->A0);
 
-            tcg_gen_subi_tl(s->A0, cpu_T1, size * i);
+            tcg_gen_subi_tl(s->A0, s->T1, size * i);
             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
             gen_op_st_v(s, d_ot, cpu_tmp0, s->A0);
         }
 
         /* Push the current FrameTemp as the last level.  */
-        tcg_gen_subi_tl(s->A0, cpu_T1, size * level);
+        tcg_gen_subi_tl(s->A0, s->T1, size * level);
         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
-        gen_op_st_v(s, d_ot, cpu_T1, s->A0);
+        gen_op_st_v(s, d_ot, s->T1, s->A0);
     }
 
     /* Copy the FrameTemp value to EBP.  */
-    gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
+    gen_op_mov_reg_v(a_ot, R_EBP, s->T1);
 
     /* Compute the final value of ESP.  */
-    tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
-    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
+    tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
+    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
 }
 
 static void gen_leave(DisasContext *s)
@@ -2457,10 +2456,10 @@ static void gen_leave(DisasContext *s)
     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
     gen_op_ld_v(s, d_ot, s->T0, s->A0);
 
-    tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
+    tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
 
     gen_op_mov_reg_v(d_ot, R_EBP, s->T0);
-    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
+    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
 }
 
 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
@@ -3854,10 +3853,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
                                        s->A0, bound);
                     tcg_temp_free(bound);
-                    tcg_gen_movi_tl(cpu_T1, 1);
-                    tcg_gen_shl_tl(cpu_T1, cpu_T1, s->A0);
-                    tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
-                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_movi_tl(s->T1, 1);
+                    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
+                    tcg_gen_subi_tl(s->T1, s->T1, 1);
+                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
 
                     gen_op_mov_reg_v(ot, reg, s->T0);
                     gen_op_update1_cc(s);
@@ -3873,19 +3872,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
+                tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
                 {
                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
                     /* Note that since we're using BMILG (in order to get O
                        cleared) we need to store the inverse into C.  */
                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
-                                       cpu_T1, bound);
-                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
-                                       bound, bound, cpu_T1);
+                                       s->T1, bound);
+                    tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
+                                       bound, bound, s->T1);
                     tcg_temp_free(bound);
                 }
                 tcg_gen_movi_tl(s->A0, -1);
-                tcg_gen_shl_tl(s->A0, s->A0, cpu_T1);
+                tcg_gen_shl_tl(s->A0, s->A0, s->T1);
                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
                 gen_op_mov_reg_v(ot, reg, s->T0);
                 gen_op_update1_cc(s);
@@ -3911,10 +3910,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     break;
 #ifdef TARGET_X86_64
                 case MO_64:
-                    tcg_gen_mulu2_i64(s->T0, cpu_T1,
+                    tcg_gen_mulu2_i64(s->T0, s->T1,
                                       s->T0, cpu_regs[R_EDX]);
                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
-                    tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
+                    tcg_gen_mov_i64(cpu_regs[reg], s->T1);
                     break;
 #endif
                 }
@@ -3931,11 +3930,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
-                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
+                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
                 } else {
-                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
+                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pdep(cpu_regs[reg], s->T0, cpu_T1);
+                gen_helper_pdep(cpu_regs[reg], s->T0, s->T1);
                 break;
 
             case 0x2f5: /* pext Gy, By, Ey */
@@ -3949,11 +3948,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
-                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
+                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
                 } else {
-                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
+                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pext(cpu_regs[reg], s->T0, cpu_T1);
+                gen_helper_pext(cpu_regs[reg], s->T0, s->T1);
                 break;
 
             case 0x1f6: /* adcx Gy, Ey */
@@ -4045,22 +4044,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 if (ot == MO_64) {
-                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
+                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
                 } else {
-                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
+                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
                 }
                 if (b == 0x1f7) {
-                    tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_shl_tl(s->T0, s->T0, s->T1);
                 } else if (b == 0x2f7) {
                     if (ot != MO_64) {
                         tcg_gen_ext32s_tl(s->T0, s->T0);
                     }
-                    tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_sar_tl(s->T0, s->T0, s->T1);
                 } else {
                     if (ot != MO_64) {
                         tcg_gen_ext32u_tl(s->T0, s->T0);
                     }
-                    tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_shr_tl(s->T0, s->T0, s->T1);
                 }
                 gen_op_mov_reg_v(ot, reg, s->T0);
                 break;
@@ -4080,16 +4079,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
-                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
-                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_subi_tl(s->T1, s->T0, 1);
+                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
                     break;
                 case 2: /* blsmsk By,Ey */
-                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
-                    tcg_gen_xor_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_subi_tl(s->T1, s->T0, 1);
+                    tcg_gen_xor_tl(s->T0, s->T0, s->T1);
                     break;
                 case 3: /* blsi By, Ey */
-                    tcg_gen_neg_tl(cpu_T1, s->T0);
-                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
+                    tcg_gen_neg_tl(s->T1, s->T0);
+                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
                     break;
                 default:
                     goto unknown_op;
@@ -4677,7 +4676,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 } else {
                     opreg = rm;
                 }
-                gen_op_mov_v_reg(ot, cpu_T1, reg);
+                gen_op_mov_v_reg(ot, s->T1, reg);
                 gen_op(s, op, ot, opreg);
                 break;
             case 1: /* OP Gv, Ev */
@@ -4687,17 +4686,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
                     gen_lea_modrm(env, s, modrm);
-                    gen_op_ld_v(s, ot, cpu_T1, s->A0);
+                    gen_op_ld_v(s, ot, s->T1, s->A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
                 } else {
-                    gen_op_mov_v_reg(ot, cpu_T1, rm);
+                    gen_op_mov_v_reg(ot, s->T1, rm);
                 }
                 gen_op(s, op, ot, reg);
                 break;
             case 2: /* OP A, Iv */
                 val = insn_get(env, s, ot);
-                tcg_gen_movi_tl(cpu_T1, val);
+                tcg_gen_movi_tl(s->T1, val);
                 gen_op(s, op, ot, OR_EAX);
                 break;
             }
@@ -4743,7 +4742,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 val = (int8_t)insn_get(env, s, MO_8);
                 break;
             }
-            tcg_gen_movi_tl(cpu_T1, val);
+            tcg_gen_movi_tl(s->T1, val);
             gen_op(s, op, ot, opreg);
         }
         break;
@@ -4783,7 +4782,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         switch(op) {
         case 0: /* test */
             val = insn_get(env, s, ot);
-            tcg_gen_movi_tl(cpu_T1, val);
+            tcg_gen_movi_tl(s->T1, val);
             gen_op_testl_T0_T1_cc(s);
             set_cc_op(s, CC_OP_LOGICB + ot);
             break;
@@ -4847,22 +4846,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 4: /* mul */
             switch(ot) {
             case MO_8:
-                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
+                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
                 tcg_gen_ext8u_tl(s->T0, s->T0);
-                tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
+                tcg_gen_ext8u_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
                 gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
-                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
+                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
                 tcg_gen_ext16u_tl(s->T0, s->T0);
-                tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
+                tcg_gen_ext16u_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
                 gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_shri_tl(s->T0, s->T0, 16);
@@ -4896,11 +4895,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 5: /* imul */
             switch(ot) {
             case MO_8:
-                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
+                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
                 tcg_gen_ext8s_tl(s->T0, s->T0);
-                tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
+                tcg_gen_ext8s_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
                 gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_ext8s_tl(cpu_tmp0, s->T0);
@@ -4908,11 +4907,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
-                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
+                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
                 tcg_gen_ext16s_tl(s->T0, s->T0);
-                tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
+                tcg_gen_ext16s_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
                 gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
@@ -5041,25 +5040,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 tcg_gen_ext16u_tl(s->T0, s->T0);
             }
             next_eip = s->pc - s->cs_base;
-            tcg_gen_movi_tl(cpu_T1, next_eip);
-            gen_push_v(s, cpu_T1);
+            tcg_gen_movi_tl(s->T1, next_eip);
+            gen_push_v(s, s->T1);
             gen_op_jmp_v(s->T0);
             gen_bnd_jmp(s);
             gen_jr(s, s->T0);
             break;
         case 3: /* lcall Ev */
-            gen_op_ld_v(s, ot, cpu_T1, s->A0);
+            gen_op_ld_v(s, ot, s->T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
             gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_lcall:
             if (s->pe && !s->vm86) {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
+                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, s->T1,
                                            tcg_const_i32(dflag - 1),
                                            tcg_const_tl(s->pc - s->cs_base));
             } else {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
+                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, s->T1,
                                       tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
             }
@@ -5075,17 +5074,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_jr(s, s->T0);
             break;
         case 5: /* ljmp Ev */
-            gen_op_ld_v(s, ot, cpu_T1, s->A0);
+            gen_op_ld_v(s, ot, s->T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
             gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_ljmp:
             if (s->pe && !s->vm86) {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
+                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, s->T1,
                                           tcg_const_tl(s->pc - s->cs_base));
             } else {
                 gen_op_movl_seg_T0_vm(s, R_CS);
-                gen_op_jmp_v(cpu_T1);
+                gen_op_jmp_v(s->T1);
             }
             tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
             gen_jr(s, cpu_tmp4);
@@ -5106,7 +5105,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_v_reg(ot, cpu_T1, reg);
+        gen_op_mov_v_reg(ot, s->T1, reg);
         gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
@@ -5117,7 +5116,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         val = insn_get(env, s, ot);
 
         gen_op_mov_v_reg(ot, s->T0, OR_EAX);
-        tcg_gen_movi_tl(cpu_T1, val);
+        tcg_gen_movi_tl(s->T1, val);
         gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
@@ -5183,25 +5182,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         if (b == 0x69) {
             val = insn_get(env, s, ot);
-            tcg_gen_movi_tl(cpu_T1, val);
+            tcg_gen_movi_tl(s->T1, val);
         } else if (b == 0x6b) {
             val = (int8_t)insn_get(env, s, MO_8);
-            tcg_gen_movi_tl(cpu_T1, val);
+            tcg_gen_movi_tl(s->T1, val);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T1, reg);
+            gen_op_mov_v_reg(ot, s->T1, reg);
         }
         switch (ot) {
 #ifdef TARGET_X86_64
         case MO_64:
-            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, s->T0, cpu_T1);
+            tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
-            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
+            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
             break;
 #endif
         case MO_32:
             tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
+            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
             tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                               cpu_tmp2_i32, cpu_tmp3_i32);
             tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
@@ -5212,9 +5211,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
         default:
             tcg_gen_ext16s_tl(s->T0, s->T0);
-            tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
+            tcg_gen_ext16s_tl(s->T1, s->T1);
             /* XXX: use 32 bit mul which could be faster */
-            tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
+            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
             tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
             tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
@@ -5232,22 +5231,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_op_mov_v_reg(ot, s->T0, reg);
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_v_reg(ot, cpu_T1, rm);
-            tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
-            gen_op_mov_reg_v(ot, reg, cpu_T1);
+            gen_op_mov_v_reg(ot, s->T1, rm);
+            tcg_gen_add_tl(s->T0, s->T0, s->T1);
+            gen_op_mov_reg_v(ot, reg, s->T1);
             gen_op_mov_reg_v(ot, rm, s->T0);
         } else {
             gen_lea_modrm(env, s, modrm);
             if (s->prefix & PREFIX_LOCK) {
-                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, s->T0,
+                tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
                                             s->mem_index, ot | MO_LE);
-                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_add_tl(s->T0, s->T0, s->T1);
             } else {
-                gen_op_ld_v(s, ot, cpu_T1, s->A0);
-                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
+                gen_op_ld_v(s, ot, s->T1, s->A0);
+                tcg_gen_add_tl(s->T0, s->T0, s->T1);
                 gen_op_st_v(s, ot, s->T0, s->A0);
             }
-            gen_op_mov_reg_v(ot, reg, cpu_T1);
+            gen_op_mov_reg_v(ot, reg, s->T1);
         }
         gen_op_update2_cc(s);
         set_cc_op(s, CC_OP_ADDB + ot);
@@ -5653,16 +5652,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             rm = (modrm & 7) | REX_B(s);
         do_xchg_reg:
             gen_op_mov_v_reg(ot, s->T0, reg);
-            gen_op_mov_v_reg(ot, cpu_T1, rm);
+            gen_op_mov_v_reg(ot, s->T1, rm);
             gen_op_mov_reg_v(ot, rm, s->T0);
-            gen_op_mov_reg_v(ot, reg, cpu_T1);
+            gen_op_mov_reg_v(ot, reg, s->T1);
         } else {
             gen_lea_modrm(env, s, modrm);
             gen_op_mov_v_reg(ot, s->T0, reg);
             /* for xchg, lock is implicit */
-            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, s->T0,
+            tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
                                    s->mem_index, ot | MO_LE);
-            gen_op_mov_reg_v(ot, reg, cpu_T1);
+            gen_op_mov_reg_v(ot, reg, s->T1);
         }
         break;
     case 0xc4: /* les Gv */
@@ -5689,13 +5688,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod == 3)
             goto illegal_op;
         gen_lea_modrm(env, s, modrm);
-        gen_op_ld_v(s, ot, cpu_T1, s->A0);
+        gen_op_ld_v(s, ot, s->T1, s->A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
         gen_op_ld_v(s, MO_16, s->T0, s->A0);
         gen_movl_seg_T0(s, op);
         /* then put the data */
-        gen_op_mov_reg_v(ot, reg, cpu_T1);
+        gen_op_mov_reg_v(ot, reg, s->T1);
         if (s->base.is_jmp) {
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
@@ -5774,7 +5773,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         } else {
             opreg = rm;
         }
-        gen_op_mov_v_reg(ot, cpu_T1, reg);
+        gen_op_mov_v_reg(ot, s->T1, reg);
 
         if (shift) {
             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
@@ -6387,8 +6386,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_io_start();
 	}
         tcg_gen_movi_i32(cpu_tmp2_i32, val);
-        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
-        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
+        gen_helper_in_func(ot, s->T1, cpu_tmp2_i32);
+        gen_op_mov_reg_v(ot, R_EAX, s->T1);
         gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
@@ -6402,13 +6401,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         tcg_gen_movi_tl(s->T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
+        gen_op_mov_v_reg(ot, s->T1, R_EAX);
 
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
         tcg_gen_movi_i32(cpu_tmp2_i32, val);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
+        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
         gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -6426,8 +6425,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_io_start();
 	}
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
-        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
+        gen_helper_in_func(ot, s->T1, cpu_tmp2_i32);
+        gen_op_mov_reg_v(ot, R_EAX, s->T1);
         gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
@@ -6440,13 +6439,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
+        gen_op_mov_v_reg(ot, s->T1, R_EAX);
 
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
+        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
         gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -6552,7 +6551,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             selector = insn_get(env, s, MO_16);
 
             tcg_gen_movi_tl(s->T0, selector);
-            tcg_gen_movi_tl(cpu_T1, offset);
+            tcg_gen_movi_tl(s->T1, offset);
         }
         goto do_lcall;
     case 0xe9: /* jmp im */
@@ -6581,7 +6580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             selector = insn_get(env, s, MO_16);
 
             tcg_gen_movi_tl(s->T0, selector);
-            tcg_gen_movi_tl(cpu_T1, offset);
+            tcg_gen_movi_tl(s->T1, offset);
         }
         goto do_ljmp;
     case 0xeb: /* jmp Jb */
@@ -6753,7 +6752,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
         /* load shift */
         val = x86_ldub_code(env, s);
-        tcg_gen_movi_tl(cpu_T1, val);
+        tcg_gen_movi_tl(s->T1, val);
         if (op < 4)
             goto unknown_op;
         op -= 4;
@@ -6775,12 +6774,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
-        gen_op_mov_v_reg(MO_32, cpu_T1, reg);
+        gen_op_mov_v_reg(MO_32, s->T1, reg);
         if (mod != 3) {
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
             /* specific case: we need to add a displacement */
-            gen_exts(ot, cpu_T1);
-            tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
+            gen_exts(ot, s->T1);
+            tcg_gen_sari_tl(cpu_tmp0, s->T1, 3 + ot);
             tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
@@ -6791,9 +6790,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_op_mov_v_reg(ot, s->T0, rm);
         }
     bt_op:
-        tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
+        tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
         tcg_gen_movi_tl(cpu_tmp0, 1);
-        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
+        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, s->T1);
         if (s->prefix & PREFIX_LOCK) {
             switch (op) {
             case 0: /* bt */
@@ -6816,9 +6815,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                             s->mem_index, ot | MO_LE);
                 break;
             }
-            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
+            tcg_gen_shr_tl(cpu_tmp4, s->T0, s->T1);
         } else {
-            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
+            tcg_gen_shr_tl(cpu_tmp4, s->T0, s->T1);
             switch (op) {
             case 0: /* bt */
                 /* Data already loaded; nothing to do.  */
@@ -6914,8 +6913,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             if (b & 1) {
                 /* For bsr, return the bit index of the first 1 bit,
                    not the count of leading zeros.  */
-                tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
-                tcg_gen_clz_tl(s->T0, s->T0, cpu_T1);
+                tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
+                tcg_gen_clz_tl(s->T0, s->T0, s->T1);
                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
             } else {
                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
@@ -7512,14 +7511,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
+            gen_op_ld_v(s, MO_16, s->T1, s->A0);
             gen_add_A0_im(s, 2);
             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             if (dflag == MO_16) {
                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
-            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
+            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
             break;
 
         CASE_MODRM_MEM_OP(3): /* lidt */
@@ -7529,14 +7528,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
+            gen_op_ld_v(s, MO_16, s->T1, s->A0);
             gen_add_A0_im(s, 2);
             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             if (dflag == MO_16) {
                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
-            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
+            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
             break;
 
         CASE_MODRM_OP(4): /* smsw */
@@ -8471,7 +8470,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
 #endif
 
     dc->T0 = tcg_temp_new();
-    cpu_T1 = tcg_temp_new();
+    dc->T1 = tcg_temp_new();
     dc->A0 = tcg_temp_new();
 
     cpu_tmp0 = tcg_temp_new();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 05/13] target/i386: move cpu_tmp0 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (3 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:51   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 06/13] target/i386: move cpu_tmp4 " Emilio G. Cota
                   ` (8 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 282 ++++++++++++++++++++--------------------
 1 file changed, 144 insertions(+), 138 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index bd27e65344..873231fb44 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -78,8 +78,8 @@ static TCGv cpu_regs[CPU_NB_REGS];
 static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
-/* local register indexes (only used inside old micro ops) */
-static TCGv cpu_tmp0, cpu_tmp4;
+
+static TCGv cpu_tmp4;
 static TCGv_ptr cpu_ptr0, cpu_ptr1;
 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
@@ -139,6 +139,9 @@ typedef struct DisasContext {
     TCGv T0;
     TCGv T1;
 
+    /* TCG local register indexes (only used inside old micro ops) */
+    TCGv tmp0;
+
     sigjmp_buf jmpbuf;
 } DisasContext;
 
@@ -406,16 +409,17 @@ static inline void gen_op_jmp_v(TCGv dest)
     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 }
 
-static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
+static inline
+void gen_op_add_reg_im(DisasContext *s, TCGMemOp size, int reg, int32_t val)
 {
-    tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
-    gen_op_mov_reg_v(size, reg, cpu_tmp0);
+    tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
+    gen_op_mov_reg_v(size, reg, s->tmp0);
 }
 
 static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
 {
-    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], s->T0);
-    gen_op_mov_reg_v(size, reg, cpu_tmp0);
+    tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
+    gen_op_mov_reg_v(size, reg, s->tmp0);
 }
 
 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
@@ -437,10 +441,10 @@ static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
     }
 }
 
-static inline void gen_jmp_im(target_ulong pc)
+static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
 {
-    tcg_gen_movi_tl(cpu_tmp0, pc);
-    gen_op_jmp_v(cpu_tmp0);
+    tcg_gen_movi_tl(s->tmp0, pc);
+    gen_op_jmp_v(s->tmp0);
 }
 
 /* Compute SEG:REG into A0.  SEG is selected from the override segment
@@ -556,18 +560,20 @@ static void gen_exts(TCGMemOp ot, TCGv reg)
     gen_ext_tl(reg, reg, ot, true);
 }
 
-static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
+static inline
+void gen_op_jnz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1)
 {
-    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
-    gen_extu(size, cpu_tmp0);
-    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
+    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
+    gen_extu(size, s->tmp0);
+    tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
 }
 
-static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
+static inline
+void gen_op_jz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1)
 {
-    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
-    gen_extu(size, cpu_tmp0);
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
+    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
+    gen_extu(size, s->tmp0);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
 }
 
 static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
@@ -627,7 +633,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
     }
     if(s->flags & HF_SVMI_MASK) {
         gen_update_cc_op(s);
-        gen_jmp_im(cur_eip);
+        gen_jmp_im(s, cur_eip);
         svm_flags |= (1 << (4 + ot));
         next_eip = s->pc - s->cs_base;
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
@@ -743,9 +749,9 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
     case CC_OP_SUBB ... CC_OP_SUBQ:
         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
         size = s->cc_op - CC_OP_SUBB;
-        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
         /* If no temporary was used, be careful not to alias t1 and t0.  */
-        t0 = t1 == cpu_cc_src ? cpu_tmp0 : reg;
+        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
         tcg_gen_mov_tl(t0, s->cc_srcT);
         gen_extu(size, t0);
         goto add_sub;
@@ -753,7 +759,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
     case CC_OP_ADDB ... CC_OP_ADDQ:
         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
         size = s->cc_op - CC_OP_ADDB;
-        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
     add_sub:
         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
@@ -905,7 +911,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         case JCC_BE:
             tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
             gen_extu(size, cpu_tmp4);
-            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
                                .reg2 = t0, .mask = -1, .use_reg2 = true };
             break;
@@ -918,7 +924,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         fast_jcc_l:
             tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
             gen_exts(size, cpu_tmp4);
-            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
+            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
             cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
                                .reg2 = t0, .mask = -1, .use_reg2 = true };
             break;
@@ -955,7 +961,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         case JCC_L:
             gen_compute_eflags(s);
             if (reg == cpu_cc_src) {
-                reg = cpu_tmp0;
+                reg = s->tmp0;
             }
             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
@@ -966,7 +972,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         case JCC_LE:
             gen_compute_eflags(s);
             if (reg == cpu_cc_src) {
-                reg = cpu_tmp0;
+                reg = s->tmp0;
             }
             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
@@ -1061,7 +1067,7 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
 {
     TCGLabel *l1 = gen_new_label();
     TCGLabel *l2 = gen_new_label();
-    gen_op_jnz_ecx(s->aflag, l1);
+    gen_op_jnz_ecx(s, s->aflag, l1);
     gen_set_label(l2);
     gen_jmp_tb(s, next_eip, 1);
     gen_set_label(l1);
@@ -1171,11 +1177,11 @@ static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
     gen_update_cc_op(s);                                                      \
     l2 = gen_jz_ecx_string(s, next_eip);                                      \
     gen_ ## op(s, ot);                                                        \
-    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
+    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
     /* a loop would cause two single step exceptions if ECX = 1               \
        before rep string_insn */                                              \
     if (s->repz_opt)                                                          \
-        gen_op_jz_ecx(s->aflag, l2);                                          \
+        gen_op_jz_ecx(s, s->aflag, l2);                                       \
     gen_jmp(s, cur_eip);                                                      \
 }
 
@@ -1189,11 +1195,11 @@ static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
     gen_update_cc_op(s);                                                      \
     l2 = gen_jz_ecx_string(s, next_eip);                                      \
     gen_ ## op(s, ot);                                                        \
-    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
+    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
     gen_update_cc_op(s);                                                      \
     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
     if (s->repz_opt)                                                          \
-        gen_op_jz_ecx(s->aflag, l2);                                          \
+        gen_op_jz_ecx(s, s->aflag, l2);                                       \
     gen_jmp(s, cur_eip);                                                      \
 }
 
@@ -1447,27 +1453,27 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     }
 
     tcg_gen_andi_tl(s->T1, s->T1, mask);
-    tcg_gen_subi_tl(cpu_tmp0, s->T1, 1);
+    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
 
     if (is_right) {
         if (is_arith) {
             gen_exts(ot, s->T0);
-            tcg_gen_sar_tl(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
         } else {
             gen_extu(ot, s->T0);
-            tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
         }
     } else {
-        tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
+        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
     }
 
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, s->T0, cpu_tmp0, s->T1, is_right);
+    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
 }
 
 static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
@@ -1640,9 +1646,9 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
                 shift = mask + 1 - shift;
             }
             gen_extu(ot, s->T0);
-            tcg_gen_shli_tl(cpu_tmp0, s->T0, shift);
+            tcg_gen_shli_tl(s->tmp0, s->T0, shift);
             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
-            tcg_gen_or_tl(s->T0, s->T0, cpu_tmp0);
+            tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
             break;
         }
     }
@@ -1751,9 +1757,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
            portion by constructing it as a 32-bit value.  */
         if (is_right) {
-            tcg_gen_deposit_tl(cpu_tmp0, s->T0, s->T1, 16, 16);
+            tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
             tcg_gen_mov_tl(s->T1, s->T0);
-            tcg_gen_mov_tl(s->T0, cpu_tmp0);
+            tcg_gen_mov_tl(s->T0, s->tmp0);
         } else {
             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
         }
@@ -1761,35 +1767,35 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 #ifdef TARGET_X86_64
     case MO_32:
         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
-        tcg_gen_subi_tl(cpu_tmp0, count, 1);
+        tcg_gen_subi_tl(s->tmp0, count, 1);
         if (is_right) {
             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
-            tcg_gen_shr_i64(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
             tcg_gen_shr_i64(s->T0, s->T0, count);
         } else {
             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
-            tcg_gen_shl_i64(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
             tcg_gen_shl_i64(s->T0, s->T0, count);
-            tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
+            tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
             tcg_gen_shri_i64(s->T0, s->T0, 32);
         }
         break;
 #endif
     default:
-        tcg_gen_subi_tl(cpu_tmp0, count, 1);
+        tcg_gen_subi_tl(s->tmp0, count, 1);
         if (is_right) {
-            tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
             tcg_gen_shr_tl(s->T0, s->T0, count);
             tcg_gen_shl_tl(s->T1, s->T1, cpu_tmp4);
         } else {
-            tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
+            tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
             if (ot == MO_16) {
                 /* Only needed if count > 16, for Intel behaviour.  */
                 tcg_gen_subfi_tl(cpu_tmp4, 33, count);
                 tcg_gen_shr_tl(cpu_tmp4, s->T1, cpu_tmp4);
-                tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
+                tcg_gen_or_tl(s->tmp0, s->tmp0, cpu_tmp4);
             }
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
@@ -1806,7 +1812,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, s->T0, cpu_tmp0, count, is_right);
+    gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
     tcg_temp_free(count);
 }
 
@@ -2196,13 +2202,13 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
     if (use_goto_tb(s, pc))  {
         /* jump to same page: we can use a direct jump */
         tcg_gen_goto_tb(tb_num);
-        gen_jmp_im(eip);
+        gen_jmp_im(s, eip);
         tcg_gen_exit_tb(s->base.tb, tb_num);
         s->base.is_jmp = DISAS_NORETURN;
     } else {
         /* jump to another page */
-        gen_jmp_im(eip);
-        gen_jr(s, cpu_tmp0);
+        gen_jmp_im(s, eip);
+        gen_jr(s, s->tmp0);
     }
 }
 
@@ -2224,11 +2230,11 @@ static inline void gen_jcc(DisasContext *s, int b,
         l2 = gen_new_label();
         gen_jcc1(s, b, l1);
 
-        gen_jmp_im(next_eip);
+        gen_jmp_im(s, next_eip);
         tcg_gen_br(l2);
 
         gen_set_label(l1);
-        gen_jmp_im(val);
+        gen_jmp_im(s, val);
         gen_set_label(l2);
         gen_eob(s);
     }
@@ -2312,7 +2318,7 @@ gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
     if (likely(!(s->flags & HF_SVMI_MASK)))
         return;
     gen_update_cc_op(s);
-    gen_jmp_im(pc_start - s->cs_base);
+    gen_jmp_im(s, pc_start - s->cs_base);
     gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
                                          tcg_const_i64(param));
 }
@@ -2325,7 +2331,7 @@ gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
 
 static inline void gen_stack_update(DisasContext *s, int addend)
 {
-    gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
+    gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
 }
 
 /* Generate a push. It depends on ss32, addseg and dflag.  */
@@ -2427,11 +2433,11 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
         for (i = 1; i < level; ++i) {
             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
-            gen_op_ld_v(s, d_ot, cpu_tmp0, s->A0);
+            gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
 
             tcg_gen_subi_tl(s->A0, s->T1, size * i);
             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
-            gen_op_st_v(s, d_ot, cpu_tmp0, s->A0);
+            gen_op_st_v(s, d_ot, s->tmp0, s->A0);
         }
 
         /* Push the current FrameTemp as the last level.  */
@@ -2465,7 +2471,7 @@ static void gen_leave(DisasContext *s)
 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
 {
     gen_update_cc_op(s);
-    gen_jmp_im(cur_eip);
+    gen_jmp_im(s, cur_eip);
     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
     s->base.is_jmp = DISAS_NORETURN;
 }
@@ -2502,7 +2508,7 @@ static void gen_interrupt(DisasContext *s, int intno,
                           target_ulong cur_eip, target_ulong next_eip)
 {
     gen_update_cc_op(s);
-    gen_jmp_im(cur_eip);
+    gen_jmp_im(s, cur_eip);
     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
                                tcg_const_i32(next_eip - cur_eip));
     s->base.is_jmp = DISAS_NORETURN;
@@ -2511,7 +2517,7 @@ static void gen_interrupt(DisasContext *s, int intno,
 static void gen_debug(DisasContext *s, target_ulong cur_eip)
 {
     gen_update_cc_op(s);
-    gen_jmp_im(cur_eip);
+    gen_jmp_im(s, cur_eip);
     gen_helper_debug(cpu_env);
     s->base.is_jmp = DISAS_NORETURN;
 }
@@ -2621,7 +2627,7 @@ static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
     if (s->jmp_opt) {
         gen_goto_tb(s, tb_num, eip);
     } else {
-        gen_jmp_im(eip);
+        gen_jmp_im(s, eip);
         gen_eob(s);
     }
 }
@@ -2648,8 +2654,8 @@ static inline void gen_ldo_env_A0(DisasContext *s, int offset)
     int mem_index = s->mem_index;
     tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_addi_tl(cpu_tmp0, s->A0, 8);
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
+    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
+    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->tmp0, mem_index, MO_LEQ);
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
 }
 
@@ -2658,9 +2664,9 @@ static inline void gen_sto_env_A0(DisasContext *s, int offset)
     int mem_index = s->mem_index;
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
     tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
-    tcg_gen_addi_tl(cpu_tmp0, s->A0, 8);
+    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
     tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
+    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->tmp0, mem_index, MO_LEQ);
 }
 
 static inline void gen_op_movo(int d_offset, int s_offset)
@@ -3713,9 +3719,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                         offsetof(ZMMReg, ZMM_L(0)));
                         break;
                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
-                        tcg_gen_qemu_ld_tl(cpu_tmp0, s->A0,
+                        tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
                                            s->mem_index, MO_LEUW);
-                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
+                        tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
                                         offsetof(ZMMReg, ZMM_W(0)));
                         break;
                     case 0x2a:            /* movntqda */
@@ -3999,7 +4005,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
                             gen_compute_eflags(s);
                         }
-                        carry_in = cpu_tmp0;
+                        carry_in = s->tmp0;
                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
                     }
@@ -4902,8 +4908,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
                 gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-                tcg_gen_ext8s_tl(cpu_tmp0, s->T0);
-                tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
+                tcg_gen_ext8s_tl(s->tmp0, s->T0);
+                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
@@ -4914,8 +4920,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
                 gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-                tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
-                tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
+                tcg_gen_ext16s_tl(s->tmp0, s->T0);
+                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
                 tcg_gen_shri_tl(s->T0, s->T0, 16);
                 gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
                 set_cc_op(s, CC_OP_MULW);
@@ -5215,8 +5221,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* XXX: use 32 bit mul which could be faster */
             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-            tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
-            tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
+            tcg_gen_ext16s_tl(s->tmp0, s->T0);
+            tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
             gen_op_mov_reg_v(ot, reg, s->T0);
             break;
         }
@@ -5423,7 +5429,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_pop_update(s, ot);
         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
         if (s->base.is_jmp) {
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             if (reg == R_SS) {
                 s->tf = 0;
                 gen_eob_inhibit_irq(s, true);
@@ -5438,7 +5444,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_movl_seg_T0(s, (b >> 3) & 7);
         gen_pop_update(s, ot);
         if (s->base.is_jmp) {
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
         }
         break;
@@ -5489,7 +5495,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_movl_seg_T0(s, reg);
         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
         if (s->base.is_jmp) {
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             if (reg == R_SS) {
                 s->tf = 0;
                 gen_eob_inhibit_irq(s, true);
@@ -5696,7 +5702,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         /* then put the data */
         gen_op_mov_reg_v(ot, reg, s->T1);
         if (s->base.is_jmp) {
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
         }
         break;
@@ -6478,7 +6484,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     do_lret:
         if (s->pe && !s->vm86) {
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(val));
         } else {
@@ -6691,7 +6697,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_pop_update(s, ot);
             set_cc_op(s, CC_OP_EFLAGS);
             /* abort translation because TF/AC flag may change */
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
         }
         break;
@@ -6779,9 +6785,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
             /* specific case: we need to add a displacement */
             gen_exts(ot, s->T1);
-            tcg_gen_sari_tl(cpu_tmp0, s->T1, 3 + ot);
-            tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
-            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
+            tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
+            tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
+            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
             if (!(s->prefix & PREFIX_LOCK)) {
                 gen_op_ld_v(s, ot, s->T0, s->A0);
@@ -6791,8 +6797,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
     bt_op:
         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
-        tcg_gen_movi_tl(cpu_tmp0, 1);
-        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, s->T1);
+        tcg_gen_movi_tl(s->tmp0, 1);
+        tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
         if (s->prefix & PREFIX_LOCK) {
             switch (op) {
             case 0: /* bt */
@@ -6801,17 +6807,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_ld_v(s, ot, s->T0, s->A0);
                 break;
             case 1: /* bts */
-                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
                                            s->mem_index, ot | MO_LE);
                 break;
             case 2: /* btr */
-                tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
-                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, cpu_tmp0,
+                tcg_gen_not_tl(s->tmp0, s->tmp0);
+                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             default:
             case 3: /* btc */
-                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             }
@@ -6823,14 +6829,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 /* Data already loaded; nothing to do.  */
                 break;
             case 1: /* bts */
-                tcg_gen_or_tl(s->T0, s->T0, cpu_tmp0);
+                tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
                 break;
             case 2: /* btr */
-                tcg_gen_andc_tl(s->T0, s->T0, cpu_tmp0);
+                tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
                 break;
             default:
             case 3: /* btc */
-                tcg_gen_xor_tl(s->T0, s->T0, cpu_tmp0);
+                tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
                 break;
             }
             if (op != 0) {
@@ -6983,7 +6989,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
         if (prefixes & PREFIX_REPZ) {
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
             s->base.is_jmp = DISAS_NORETURN;
         }
@@ -7011,7 +7017,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (CODE64(s))
             goto illegal_op;
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
         break;
 #ifdef WANT_ICEBP
@@ -7045,7 +7051,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
             gen_helper_sti(cpu_env);
             /* interruptions are enabled only the first insn after sti */
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob_inhibit_irq(s, true);
         } else {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
@@ -7113,26 +7119,26 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             switch(b) {
             case 0: /* loopnz */
             case 1: /* loopz */
-                gen_op_add_reg_im(s->aflag, R_ECX, -1);
-                gen_op_jz_ecx(s->aflag, l3);
+                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
+                gen_op_jz_ecx(s, s->aflag, l3);
                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
                 break;
             case 2: /* loop */
-                gen_op_add_reg_im(s->aflag, R_ECX, -1);
-                gen_op_jnz_ecx(s->aflag, l1);
+                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
+                gen_op_jnz_ecx(s, s->aflag, l1);
                 break;
             default:
             case 3: /* jcxz */
-                gen_op_jz_ecx(s->aflag, l1);
+                gen_op_jz_ecx(s, s->aflag, l1);
                 break;
             }
 
             gen_set_label(l3);
-            gen_jmp_im(next_eip);
+            gen_jmp_im(s, next_eip);
             tcg_gen_br(l2);
 
             gen_set_label(l1);
-            gen_jmp_im(tval);
+            gen_jmp_im(s, tval);
             gen_set_label(l2);
             gen_eob(s);
         }
@@ -7143,7 +7149,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             if (b & 2) {
                 gen_helper_rdmsr(cpu_env);
             } else {
@@ -7153,7 +7159,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         break;
     case 0x131: /* rdtsc */
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
@@ -7165,7 +7171,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         break;
     case 0x133: /* rdpmc */
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         gen_helper_rdpmc(cpu_env);
         break;
     case 0x134: /* sysenter */
@@ -7194,7 +7200,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x105: /* syscall */
         /* XXX: is it usable in real mode ? */
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
         /* TF handling for the syscall insn is different. The TF bit is  checked
            after the syscall insn completes. This allows #DB to not be
@@ -7220,7 +7226,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 #endif
     case 0x1a2: /* cpuid */
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         gen_helper_cpuid(cpu_env);
         break;
     case 0xf4: /* hlt */
@@ -7228,7 +7234,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
             s->base.is_jmp = DISAS_NORETURN;
         }
@@ -7320,7 +7326,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
             gen_extu(s->aflag, s->A0);
             gen_add_A0_ds_seg(s);
@@ -7332,7 +7338,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
             gen_eob(s);
             break;
@@ -7343,7 +7349,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_helper_clac(cpu_env);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7353,7 +7359,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_helper_stac(cpu_env);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7396,7 +7402,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
             gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
             /* End TB because translation flags may change.  */
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7409,7 +7415,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
                              tcg_const_i32(s->pc - pc_start));
             tcg_gen_exit_tb(NULL, 0);
@@ -7421,7 +7427,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_vmmcall(cpu_env);
             break;
 
@@ -7434,7 +7440,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
             break;
 
@@ -7447,7 +7453,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
             break;
 
@@ -7463,7 +7469,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             gen_update_cc_op(s);
             gen_helper_stgi(cpu_env);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7476,7 +7482,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_clgi(cpu_env);
             break;
 
@@ -7487,7 +7493,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_skinit(cpu_env);
             break;
 
@@ -7500,7 +7506,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
             break;
 
@@ -7574,7 +7580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
             gen_helper_lmsw(cpu_env, s->T0);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7584,10 +7590,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_lea_modrm(env, s, modrm);
             gen_helper_invlpg(cpu_env, s->A0);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7613,7 +7619,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                 gen_io_start();
             }
@@ -7688,11 +7694,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 a0 = NULL;
             }
             gen_op_mov_v_reg(ot, t1, reg);
-            tcg_gen_andi_tl(cpu_tmp0, t0, 3);
+            tcg_gen_andi_tl(s->tmp0, t0, 3);
             tcg_gen_andi_tl(t1, t1, 3);
             tcg_gen_movi_tl(t2, 0);
             label1 = gen_new_label();
-            tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
+            tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
             tcg_gen_andi_tl(t0, t0, ~3);
             tcg_gen_or_tl(t0, t0, t1);
             tcg_gen_movi_tl(t2, CC_Z);
@@ -7729,9 +7735,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 gen_helper_lsl(t0, cpu_env, s->T0);
             }
-            tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
+            tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
             label1 = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
             gen_op_mov_reg_v(ot, reg, t0);
             gen_set_label(label1);
             set_cc_op(s, CC_OP_EFLAGS);
@@ -7981,7 +7987,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             case 4:
             case 8:
                 gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
+                gen_jmp_im(s, pc_start - s->cs_base);
                 if (b & 2) {
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_start();
@@ -7992,7 +7998,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_end();
                     }
-                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_jmp_im(s, s->pc - s->cs_base);
                     gen_eob(s);
                 } else {
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -8035,7 +8041,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_mov_v_reg(ot, s->T0, rm);
                 tcg_gen_movi_i32(cpu_tmp2_i32, reg);
                 gen_helper_set_dr(cpu_env, cpu_tmp2_i32, s->T0);
-                gen_jmp_im(s->pc - s->cs_base);
+                gen_jmp_im(s, s->pc - s->cs_base);
                 gen_eob(s);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
@@ -8052,7 +8058,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
             gen_helper_clts(cpu_env);
             /* abort block because static cpu state changed */
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
         }
         break;
@@ -8149,7 +8155,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* XRSTOR is how MPX is enabled, which changes how
                we translate.  Thus we need to end the TB.  */
             gen_update_cc_op(s);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -8279,7 +8285,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (!(s->flags & HF_SMM_MASK))
             goto illegal_op;
         gen_update_cc_op(s);
-        gen_jmp_im(s->pc - s->cs_base);
+        gen_jmp_im(s, s->pc - s->cs_base);
         gen_helper_rsm(cpu_env);
         gen_eob(s);
         break;
@@ -8473,7 +8479,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     dc->T1 = tcg_temp_new();
     dc->A0 = tcg_temp_new();
 
-    cpu_tmp0 = tcg_temp_new();
+    dc->tmp0 = tcg_temp_new();
     cpu_tmp1_i64 = tcg_temp_new_i64();
     cpu_tmp2_i32 = tcg_temp_new_i32();
     cpu_tmp3_i32 = tcg_temp_new_i32();
@@ -8550,7 +8556,7 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
     DisasContext *dc = container_of(dcbase, DisasContext, base);
 
     if (dc->base.is_jmp == DISAS_TOO_MANY) {
-        gen_jmp_im(dc->base.pc_next - dc->cs_base);
+        gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
         gen_eob(dc);
     }
 }
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 06/13] target/i386: move cpu_tmp4 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (4 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 05/13] target/i386: move cpu_tmp0 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:52   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 07/13] target/i386: move cpu_ptr0 " Emilio G. Cota
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 78 ++++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 873231fb44..0ad6ffc4af 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,7 +79,6 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 
-static TCGv cpu_tmp4;
 static TCGv_ptr cpu_ptr0, cpu_ptr1;
 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
@@ -141,6 +140,7 @@ typedef struct DisasContext {
 
     /* TCG local register indexes (only used inside old micro ops) */
     TCGv tmp0;
+    TCGv tmp4;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -909,10 +909,10 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         size = s->cc_op - CC_OP_SUBB;
         switch (jcc_op) {
         case JCC_BE:
-            tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
-            gen_extu(size, cpu_tmp4);
+            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
+            gen_extu(size, s->tmp4);
             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
-            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
+            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
                                .reg2 = t0, .mask = -1, .use_reg2 = true };
             break;
 
@@ -922,10 +922,10 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         case JCC_LE:
             cond = TCG_COND_LE;
         fast_jcc_l:
-            tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
-            gen_exts(size, cpu_tmp4);
+            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
+            gen_exts(size, s->tmp4);
             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
-            cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
+            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
                                .reg2 = t0, .mask = -1, .use_reg2 = true };
             break;
 
@@ -1277,32 +1277,32 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
     }
     switch(op) {
     case OP_ADCL:
-        gen_compute_eflags_c(s1, cpu_tmp4);
+        gen_compute_eflags_c(s1, s1->tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(s1->T0, cpu_tmp4, s1->T1);
+            tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
-            tcg_gen_add_tl(s1->T0, s1->T0, cpu_tmp4);
+            tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update3_cc(s1, cpu_tmp4);
+        gen_op_update3_cc(s1, s1->tmp4);
         set_cc_op(s1, CC_OP_ADCB + ot);
         break;
     case OP_SBBL:
-        gen_compute_eflags_c(s1, cpu_tmp4);
+        gen_compute_eflags_c(s1, s1->tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(s1->T0, s1->T1, cpu_tmp4);
+            tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
             tcg_gen_neg_tl(s1->T0, s1->T0);
             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
-            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_tmp4);
+            tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update3_cc(s1, cpu_tmp4);
+        gen_op_update3_cc(s1, s1->tmp4);
         set_cc_op(s1, CC_OP_SBBB + ot);
         break;
     case OP_ADDL:
@@ -1492,15 +1492,15 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
         if (is_right) {
             if (is_arith) {
                 gen_exts(ot, s->T0);
-                tcg_gen_sari_tl(cpu_tmp4, s->T0, op2 - 1);
+                tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
                 tcg_gen_sari_tl(s->T0, s->T0, op2);
             } else {
                 gen_extu(ot, s->T0);
-                tcg_gen_shri_tl(cpu_tmp4, s->T0, op2 - 1);
+                tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
                 tcg_gen_shri_tl(s->T0, s->T0, op2);
             }
         } else {
-            tcg_gen_shli_tl(cpu_tmp4, s->T0, op2 - 1);
+            tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
             tcg_gen_shli_tl(s->T0, s->T0, op2);
         }
     }
@@ -1510,7 +1510,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
 
     /* update eflags if non zero shift */
     if (op2 != 0) {
-        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
+        tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
     }
@@ -1786,25 +1786,25 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
         if (is_right) {
             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
 
-            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
+            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
             tcg_gen_shr_tl(s->T0, s->T0, count);
-            tcg_gen_shl_tl(s->T1, s->T1, cpu_tmp4);
+            tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
         } else {
             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
             if (ot == MO_16) {
                 /* Only needed if count > 16, for Intel behaviour.  */
-                tcg_gen_subfi_tl(cpu_tmp4, 33, count);
-                tcg_gen_shr_tl(cpu_tmp4, s->T1, cpu_tmp4);
-                tcg_gen_or_tl(s->tmp0, s->tmp0, cpu_tmp4);
+                tcg_gen_subfi_tl(s->tmp4, 33, count);
+                tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
+                tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
             }
 
-            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
+            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
             tcg_gen_shl_tl(s->T0, s->T0, count);
-            tcg_gen_shr_tl(s->T1, s->T1, cpu_tmp4);
+            tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
         }
-        tcg_gen_movi_tl(cpu_tmp4, 0);
-        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, cpu_tmp4,
-                           cpu_tmp4, s->T1);
+        tcg_gen_movi_tl(s->tmp4, 0);
+        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
+                           s->tmp4, s->T1);
         tcg_gen_or_tl(s->T0, s->T0, s->T1);
         break;
     }
@@ -2346,7 +2346,7 @@ static void gen_push_v(DisasContext *s, TCGv val)
 
     if (!CODE64(s)) {
         if (s->addseg) {
-            new_esp = cpu_tmp4;
+            new_esp = s->tmp4;
             tcg_gen_mov_tl(new_esp, s->A0);
         }
         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
@@ -5068,8 +5068,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                       tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
             }
-            tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
-            gen_jr(s, cpu_tmp4);
+            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
+            gen_jr(s, s->tmp4);
             break;
         case 4: /* jmp Ev */
             if (dflag == MO_16) {
@@ -5092,8 +5092,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_movl_seg_T0_vm(s, R_CS);
                 gen_op_jmp_v(s->T1);
             }
-            tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
-            gen_jr(s, cpu_tmp4);
+            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
+            gen_jr(s, s->tmp4);
             break;
         case 6: /* push Ev */
             gen_push_v(s, s->T0);
@@ -6821,9 +6821,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                             s->mem_index, ot | MO_LE);
                 break;
             }
-            tcg_gen_shr_tl(cpu_tmp4, s->T0, s->T1);
+            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
         } else {
-            tcg_gen_shr_tl(cpu_tmp4, s->T0, s->T1);
+            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
             switch (op) {
             case 0: /* bt */
                 /* Data already loaded; nothing to do.  */
@@ -6867,13 +6867,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                We can get that same Z value (and the new C value) by leaving
                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
                same width.  */
-            tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
+            tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
             break;
         default:
             /* Otherwise, generate EFLAGS and replace the C bit.  */
             gen_compute_eflags(s);
-            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
+            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
                                ctz32(CC_C), 1);
             break;
         }
@@ -8483,7 +8483,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     cpu_tmp1_i64 = tcg_temp_new_i64();
     cpu_tmp2_i32 = tcg_temp_new_i32();
     cpu_tmp3_i32 = tcg_temp_new_i32();
-    cpu_tmp4 = tcg_temp_new();
+    dc->tmp4 = tcg_temp_new();
     cpu_ptr0 = tcg_temp_new_ptr();
     cpu_ptr1 = tcg_temp_new_ptr();
     dc->cc_srcT = tcg_temp_local_new();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 07/13] target/i386: move cpu_ptr0 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (5 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 06/13] target/i386: move cpu_tmp4 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:53   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 08/13] target/i386: move cpu_ptr1 " Emilio G. Cota
                   ` (6 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 101 +++++++++++++++++++++-------------------
 1 file changed, 52 insertions(+), 49 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 0ad6ffc4af..9531dafebe 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,7 +79,7 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 
-static TCGv_ptr cpu_ptr0, cpu_ptr1;
+static TCGv_ptr cpu_ptr1;
 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
 
@@ -141,6 +141,7 @@ typedef struct DisasContext {
     /* TCG local register indexes (only used inside old micro ops) */
     TCGv tmp0;
     TCGv tmp4;
+    TCGv_ptr ptr0;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -3147,27 +3148,27 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 #endif
             {
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
+                gen_helper_movl_mm_T0_mmx(s->ptr0, cpu_tmp2_i32);
             }
             break;
         case 0x16e: /* movd xmm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
-                gen_helper_movq_mm_T0_xmm(cpu_ptr0, s->T0);
+                gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
             } else
 #endif
             {
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
+                gen_helper_movl_mm_T0_xmm(s->ptr0, cpu_tmp2_i32);
             }
             break;
         case 0x6f: /* movq mm, ea */
@@ -3312,14 +3313,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     goto illegal_op;
                 field_length = x86_ldub_code(env, s) & 0x3F;
                 bit_index = x86_ldub_code(env, s) & 0x3F;
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
                     offsetof(CPUX86State,xmm_regs[reg]));
                 if (b1 == 1)
-                    gen_helper_extrq_i(cpu_env, cpu_ptr0,
+                    gen_helper_extrq_i(cpu_env, s->ptr0,
                                        tcg_const_i32(bit_index),
                                        tcg_const_i32(field_length));
                 else
-                    gen_helper_insertq_i(cpu_env, cpu_ptr0,
+                    gen_helper_insertq_i(cpu_env, s->ptr0,
                                          tcg_const_i32(bit_index),
                                          tcg_const_i32(field_length));
             }
@@ -3471,22 +3472,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 rm = (modrm & 7);
                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
             }
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
             break;
         case 0x050: /* movmskps */
             rm = (modrm & 7) | REX_B(s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+            tcg_gen_addi_ptr(s->ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
+            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, s->ptr0);
             tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
             break;
         case 0x150: /* movmskpd */
             rm = (modrm & 7) | REX_B(s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+            tcg_gen_addi_ptr(s->ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
+            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, s->ptr0);
             tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
             break;
         case 0x02a: /* cvtpi2ps */
@@ -3501,15 +3502,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
             }
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             switch(b >> 8) {
             case 0x0:
-                gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpi2ps(cpu_env, s->ptr0, cpu_ptr1);
                 break;
             default:
             case 0x1:
-                gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpi2pd(cpu_env, s->ptr0, cpu_ptr1);
                 break;
             }
             break;
@@ -3518,15 +3519,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             ot = mo_64_32(s->dflag);
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
+                sse_fn_epi(cpu_env, s->ptr0, cpu_tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
-                sse_fn_epl(cpu_env, cpu_ptr0, s->T0);
+                sse_fn_epl(cpu_env, s->ptr0, s->T0);
 #else
                 goto illegal_op;
 #endif
@@ -3546,20 +3547,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
             }
             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             switch(b) {
             case 0x02c:
-                gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvttps2pi(cpu_env, s->ptr0, cpu_ptr1);
                 break;
             case 0x12c:
-                gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvttpd2pi(cpu_env, s->ptr0, cpu_ptr1);
                 break;
             case 0x02d:
-                gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtps2pi(cpu_env, s->ptr0, cpu_ptr1);
                 break;
             case 0x12d:
-                gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpd2pi(cpu_env, s->ptr0, cpu_ptr1);
                 break;
             }
             break;
@@ -3582,17 +3583,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 rm = (modrm & 7) | REX_B(s);
                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
             }
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
             if (ot == MO_32) {
                 SSEFunc_i_ep sse_fn_i_ep =
                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
-                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
+                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, s->ptr0);
                 tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_l_ep sse_fn_l_ep =
                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
-                sse_fn_l_ep(s->T0, cpu_env, cpu_ptr0);
+                sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
 #else
                 goto illegal_op;
 #endif
@@ -3665,12 +3666,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 goto illegal_op;
             if (b1) {
                 rm = (modrm & 7) | REX_B(s);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
-                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
+                                 offsetof(CPUX86State, xmm_regs[rm]));
+                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, s->ptr0);
             } else {
                 rm = (modrm & 7);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
-                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
+                                 offsetof(CPUX86State, fpregs[rm].mmx));
+                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, s->ptr0);
             }
             reg = ((modrm >> 3) & 7) | rex_r;
             tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
@@ -3745,9 +3748,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 goto unknown_op;
             }
 
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
 
             if (b == 0x17) {
                 set_cc_op(s, CC_OP_EFLAGS);
@@ -4294,9 +4297,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
             }
 
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+            sse_fn_eppi(cpu_env, s->ptr0, cpu_ptr1, tcg_const_i32(val));
             break;
 
         case 0x33a:
@@ -4417,18 +4420,18 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
                 goto illegal_op;
             }
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
             break;
         case 0x70: /* pshufx insn */
         case 0xc6: /* pshufx insn */
             val = x86_ldub_code(env, s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             /* XXX: introduce a new table? */
             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
-            sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+            sse_fn_ppi(s->ptr0, cpu_ptr1, tcg_const_i32(val));
             break;
         case 0xc2:
             /* compare insns */
@@ -4437,9 +4440,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 goto unknown_op;
             sse_fn_epp = sse_op_table4[val][b1];
 
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
             break;
         case 0xf7:
             /* maskmov : we must prepare A0 */
@@ -4449,16 +4452,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             gen_extu(s->aflag, s->A0);
             gen_add_A0_ds_seg(s);
 
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             /* XXX: introduce a new table? */
             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
-            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, s->A0);
+            sse_fn_eppt(cpu_env, s->ptr0, cpu_ptr1, s->A0);
             break;
         default:
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
             break;
         }
         if (b == 0x2e || b == 0x2f) {
@@ -8484,7 +8487,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     cpu_tmp2_i32 = tcg_temp_new_i32();
     cpu_tmp3_i32 = tcg_temp_new_i32();
     dc->tmp4 = tcg_temp_new();
-    cpu_ptr0 = tcg_temp_new_ptr();
+    dc->ptr0 = tcg_temp_new_ptr();
     cpu_ptr1 = tcg_temp_new_ptr();
     dc->cc_srcT = tcg_temp_local_new();
 }
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 08/13] target/i386: move cpu_ptr1 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (6 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 07/13] target/i386: move cpu_ptr0 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:54   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 09/13] target/i386: move cpu_tmp2_i32 " Emilio G. Cota
                   ` (5 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 52 ++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 9531dafebe..c51f61ca2c 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,7 +79,6 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 
-static TCGv_ptr cpu_ptr1;
 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
 
@@ -142,6 +141,7 @@ typedef struct DisasContext {
     TCGv tmp0;
     TCGv tmp4;
     TCGv_ptr ptr0;
+    TCGv_ptr ptr1;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -3473,8 +3473,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
             }
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
-            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
             break;
         case 0x050: /* movmskps */
             rm = (modrm & 7) | REX_B(s);
@@ -3503,14 +3503,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             }
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
             switch(b >> 8) {
             case 0x0:
-                gen_helper_cvtpi2ps(cpu_env, s->ptr0, cpu_ptr1);
+                gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
                 break;
             default:
             case 0x1:
-                gen_helper_cvtpi2pd(cpu_env, s->ptr0, cpu_ptr1);
+                gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
                 break;
             }
             break;
@@ -3548,19 +3548,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             }
             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
             switch(b) {
             case 0x02c:
-                gen_helper_cvttps2pi(cpu_env, s->ptr0, cpu_ptr1);
+                gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
                 break;
             case 0x12c:
-                gen_helper_cvttpd2pi(cpu_env, s->ptr0, cpu_ptr1);
+                gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
                 break;
             case 0x02d:
-                gen_helper_cvtps2pi(cpu_env, s->ptr0, cpu_ptr1);
+                gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
                 break;
             case 0x12d:
-                gen_helper_cvtpd2pi(cpu_env, s->ptr0, cpu_ptr1);
+                gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
                 break;
             }
             break;
@@ -3749,8 +3749,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             }
 
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
 
             if (b == 0x17) {
                 set_cc_op(s, CC_OP_EFLAGS);
@@ -4298,8 +4298,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             }
 
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_eppi(cpu_env, s->ptr0, cpu_ptr1, tcg_const_i32(val));
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
             break;
 
         case 0x33a:
@@ -4421,17 +4421,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 goto illegal_op;
             }
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
             break;
         case 0x70: /* pshufx insn */
         case 0xc6: /* pshufx insn */
             val = x86_ldub_code(env, s);
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
             /* XXX: introduce a new table? */
             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
-            sse_fn_ppi(s->ptr0, cpu_ptr1, tcg_const_i32(val));
+            sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
             break;
         case 0xc2:
             /* compare insns */
@@ -4441,8 +4441,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             sse_fn_epp = sse_op_table4[val][b1];
 
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
             break;
         case 0xf7:
             /* maskmov : we must prepare A0 */
@@ -4453,15 +4453,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             gen_add_A0_ds_seg(s);
 
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
             /* XXX: introduce a new table? */
             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
-            sse_fn_eppt(cpu_env, s->ptr0, cpu_ptr1, s->A0);
+            sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
             break;
         default:
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, s->ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
             break;
         }
         if (b == 0x2e || b == 0x2f) {
@@ -8488,7 +8488,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     cpu_tmp3_i32 = tcg_temp_new_i32();
     dc->tmp4 = tcg_temp_new();
     dc->ptr0 = tcg_temp_new_ptr();
-    cpu_ptr1 = tcg_temp_new_ptr();
+    dc->ptr1 = tcg_temp_new_ptr();
     dc->cc_srcT = tcg_temp_local_new();
 }
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 09/13] target/i386: move cpu_tmp2_i32 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (7 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 08/13] target/i386: move cpu_ptr1 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:55   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 10/13] target/i386: move cpu_tmp3_i32 " Emilio G. Cota
                   ` (4 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 347 ++++++++++++++++++++--------------------
 1 file changed, 174 insertions(+), 173 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index c51f61ca2c..ec68f7dba1 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,7 +79,7 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 
-static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
+static TCGv_i32 cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
 
 #include "exec/gen-icount.h"
@@ -142,6 +142,7 @@ typedef struct DisasContext {
     TCGv tmp4;
     TCGv_ptr ptr0;
     TCGv_ptr ptr1;
+    TCGv_i32 tmp2_i32;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -617,16 +618,16 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
     target_ulong next_eip;
 
     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         switch (ot) {
         case MO_8:
-            gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
+            gen_helper_check_iob(cpu_env, s->tmp2_i32);
             break;
         case MO_16:
-            gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
+            gen_helper_check_iow(cpu_env, s->tmp2_i32);
             break;
         case MO_32:
-            gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
+            gen_helper_check_iol(cpu_env, s->tmp2_i32);
             break;
         default:
             tcg_abort();
@@ -637,8 +638,8 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
         gen_jmp_im(s, cur_eip);
         svm_flags |= (1 << (4 + ot));
         next_eip = s->pc - s->cs_base;
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-        gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+        gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
                                 tcg_const_i32(svm_flags),
                                 tcg_const_i32(next_eip - cur_eip));
     }
@@ -1136,13 +1137,13 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot)
        case of page fault. */
     tcg_gen_movi_tl(s->T0, 0);
     gen_op_st_v(s, ot, s->T0, s->A0);
-    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
-    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
-    gen_helper_in_func(ot, s->T0, cpu_tmp2_i32);
+    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
+    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
+    gen_helper_in_func(ot, s->T0, s->tmp2_i32);
     gen_op_st_v(s, ot, s->T0, s->A0);
     gen_op_movl_T0_Dshift(s, ot);
     gen_op_add_reg_T0(s, s->aflag, R_EDI);
-    gen_bpt_io(s, cpu_tmp2_i32, ot);
+    gen_bpt_io(s, s->tmp2_i32, ot);
     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
         gen_io_end();
     }
@@ -1156,13 +1157,13 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
     gen_string_movl_A0_ESI(s);
     gen_op_ld_v(s, ot, s->T0, s->A0);
 
-    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
-    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
+    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
     tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T0);
-    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+    gen_helper_out_func(ot, s->tmp2_i32, cpu_tmp3_i32);
     gen_op_movl_T0_Dshift(s, ot);
     gen_op_add_reg_T0(s, s->aflag, R_ESI);
-    gen_bpt_io(s, cpu_tmp2_i32, ot);
+    gen_bpt_io(s, s->tmp2_i32, ot);
     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
         gen_io_end();
     }
@@ -1421,7 +1422,7 @@ static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
     tcg_temp_free(z_tl);
 
     /* Get the two potential CC_OP values into temporaries.  */
-    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
+    tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
     if (s->cc_op == CC_OP_DYNAMIC) {
         oldop = cpu_cc_op;
     } else {
@@ -1433,7 +1434,7 @@ static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
     z32 = tcg_const_i32(0);
     s32 = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(s32, count);
-    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
+    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
     tcg_temp_free_i32(z32);
     tcg_temp_free_i32(s32);
 
@@ -1544,14 +1545,14 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
     do_long:
 #ifdef TARGET_X86_64
     case MO_32:
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
         if (is_right) {
-            tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
         } else {
-            tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
         }
-        tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
+        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
         break;
 #endif
     default:
@@ -1591,10 +1592,10 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
     t0 = tcg_const_i32(0);
     t1 = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(t1, s->T1);
-    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
+    tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
     tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
-                        cpu_tmp2_i32, cpu_tmp3_i32);
+                        s->tmp2_i32, cpu_tmp3_i32);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);
 
@@ -1620,13 +1621,13 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
         switch (ot) {
 #ifdef TARGET_X86_64
         case MO_32:
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
             if (is_right) {
-                tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
+                tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
             } else {
-                tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
+                tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
             }
-            tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
+            tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
             break;
 #endif
         default:
@@ -2111,8 +2112,8 @@ static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
         tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
     }
     tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
-    tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64);
-    gen_helper_bndck(cpu_env, cpu_tmp2_i32);
+    tcg_gen_extrl_i64_i32(s->tmp2_i32, cpu_tmp1_i64);
+    gen_helper_bndck(cpu_env, s->tmp2_i32);
 }
 
 /* used for LEA and MOV AX, mem */
@@ -2289,8 +2290,8 @@ static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg)
 static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
 {
     if (s->pe && !s->vm86) {
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
         /* abort translation because the addseg value may change or
            because ss32 may change. For R_SS, translation must always
            stop as a special handling must be done to disable hardware
@@ -2684,10 +2685,10 @@ static inline void gen_op_movq(int d_offset, int s_offset)
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
 }
 
-static inline void gen_op_movl(int d_offset, int s_offset)
+static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
 {
-    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
-    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
+    tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
+    tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
 }
 
 static inline void gen_op_movq_env_0(int d_offset)
@@ -3150,8 +3151,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx));
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_movl_mm_T0_mmx(s->ptr0, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
             }
             break;
         case 0x16e: /* movd xmm, ea */
@@ -3167,8 +3168,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_movl_mm_T0_xmm(s->ptr0, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
             }
             break;
         case 0x6f: /* movq mm, ea */
@@ -3213,7 +3214,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
             }
             break;
@@ -3252,14 +3253,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
             }
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
+            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
+            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
             break;
         case 0x312: /* movddup */
@@ -3294,14 +3295,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
             }
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
+            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
+            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
             break;
         case 0x178:
@@ -3398,7 +3399,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_op_st_v(s, MO_32, s->T0, s->A0);
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
             }
             break;
@@ -3480,15 +3481,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             rm = (modrm & 7) | REX_B(s);
             tcg_gen_addi_ptr(s->ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, s->ptr0);
-            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
+            gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
             break;
         case 0x150: /* movmskpd */
             rm = (modrm & 7) | REX_B(s);
             tcg_gen_addi_ptr(s->ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, s->ptr0);
-            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
+            gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
             break;
         case 0x02a: /* cvtpi2ps */
         case 0x12a: /* cvtpi2pd */
@@ -3522,8 +3523,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                sse_fn_epi(cpu_env, s->ptr0, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
@@ -3587,8 +3588,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (ot == MO_32) {
                 SSEFunc_i_ep sse_fn_i_ep =
                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
-                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, s->ptr0);
-                tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
+                sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
+                tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_l_ep sse_fn_l_ep =
@@ -3668,15 +3669,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 rm = (modrm & 7) | REX_B(s);
                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State, xmm_regs[rm]));
-                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, s->ptr0);
+                gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
             } else {
                 rm = (modrm & 7);
                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State, fpregs[rm].mmx));
-                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, s->ptr0);
+                gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
             }
             reg = ((modrm >> 3) & 7) | rex_r;
-            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
             break;
 
         case 0x138:
@@ -3716,9 +3717,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         break;
                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
+                        tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
                                         offsetof(ZMMReg, ZMM_L(0)));
                         break;
                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
@@ -3780,9 +3781,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     ot = MO_64;
                 }
 
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                gen_helper_crc32(s->T0, cpu_tmp2_i32,
+                gen_helper_crc32(s->T0, s->tmp2_i32,
                                  s->T0, tcg_const_i32(8 << ot));
 
                 ot = mo_64_32(s->dflag);
@@ -3910,11 +3911,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 switch (ot) {
                 default:
-                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
                     tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
-                    tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
-                                      cpu_tmp2_i32, cpu_tmp3_i32);
-                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
+                    tcg_gen_mulu2_i32(s->tmp2_i32, cpu_tmp3_i32,
+                                      s->tmp2_i32, cpu_tmp3_i32);
+                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
                     tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
                     break;
 #ifdef TARGET_X86_64
@@ -4162,13 +4163,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     break;
                 case 0x16:
                     if (ot == MO_32) { /* pextrd */
-                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].ZMM_L(val & 3)));
                         if (mod == 3) {
-                            tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
+                            tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
                         } else {
-                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                                 s->mem_index, MO_LEUL);
                         }
                     } else { /* pextrq */
@@ -4209,14 +4210,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     break;
                 case 0x21: /* insertps */
                     if (mod == 3) {
-                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,xmm_regs[rm]
                                                 .ZMM_L((val >> 6) & 3)));
                     } else {
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                     }
-                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+                    tcg_gen_st_i32(s->tmp2_i32, cpu_env,
                                     offsetof(CPUX86State,xmm_regs[reg]
                                             .ZMM_L((val >> 4) & 3)));
                     if ((val >> 0) & 1)
@@ -4239,12 +4240,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 case 0x22:
                     if (ot == MO_32) { /* pinsrd */
                         if (mod == 3) {
-                            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
+                            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
                         } else {
-                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                                 s->mem_index, MO_LEUL);
                         }
-                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+                        tcg_gen_st_i32(s->tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].ZMM_L(val & 3)));
                     } else { /* pinsrq */
@@ -4321,9 +4322,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 if (ot == MO_64) {
                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
                 } else {
-                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                    tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
-                    tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
+                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                    tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
+                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
                 }
                 gen_op_mov_reg_v(ot, reg, s->T0);
                 break;
@@ -4880,11 +4881,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             default:
             case MO_32:
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
                 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
-                tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
-                                  cpu_tmp2_i32, cpu_tmp3_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
+                tcg_gen_mulu2_i32(s->tmp2_i32, cpu_tmp3_i32,
+                                  s->tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
@@ -4931,16 +4932,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             default:
             case MO_32:
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
                 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
-                tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
-                                  cpu_tmp2_i32, cpu_tmp3_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
+                tcg_gen_muls2_i32(s->tmp2_i32, cpu_tmp3_i32,
+                                  s->tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
-                tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
+                tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
-                tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
-                tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
+                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
                 set_cc_op(s, CC_OP_MULL);
                 break;
 #ifdef TARGET_X86_64
@@ -5061,13 +5062,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_lcall:
             if (s->pe && !s->vm86) {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, s->T1,
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
                                            tcg_const_i32(dflag - 1),
                                            tcg_const_tl(s->pc - s->cs_base));
             } else {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, s->T1,
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
                                       tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
             }
@@ -5088,8 +5089,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_ljmp:
             if (s->pe && !s->vm86) {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, s->T1,
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
                                           tcg_const_tl(s->pc - s->cs_base));
             } else {
                 gen_op_movl_seg_T0_vm(s, R_CS);
@@ -5208,15 +5209,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 #endif
         case MO_32:
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
             tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
-            tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
-                              cpu_tmp2_i32, cpu_tmp3_i32);
-            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
-            tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
+            tcg_gen_muls2_i32(s->tmp2_i32, cpu_tmp3_i32,
+                              s->tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
+            tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
-            tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
-            tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
+            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
             break;
         default:
             tcg_gen_ext16s_tl(s->T0, s->T0);
@@ -5820,14 +5821,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
                     switch(op >> 4) {
                     case 0:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
                         break;
                     case 1:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
                         break;
                     case 2:
                         tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
@@ -5836,9 +5837,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         break;
                     case 3:
                     default:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LESW);
-                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
                         break;
                     }
 
@@ -5859,14 +5860,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 case 0:
                     switch(op >> 4) {
                     case 0:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
                         break;
                     case 1:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
                         break;
                     case 2:
                         tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
@@ -5875,9 +5876,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         break;
                     case 3:
                     default:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LESW);
-                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
                         break;
                     }
                     break;
@@ -5885,8 +5886,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     /* XXX: the corresponding CPUID bit must be tested ! */
                     switch(op >> 4) {
                     case 1:
-                        gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                        gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
@@ -5896,8 +5897,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         break;
                     case 3:
                     default:
-                        gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                        gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUW);
                         break;
                     }
@@ -5906,13 +5907,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 default:
                     switch(op >> 4) {
                     case 0:
-                        gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                        gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 1:
-                        gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                        gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
@@ -5922,8 +5923,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         break;
                     case 3:
                     default:
-                        gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                        gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUW);
                         break;
                     }
@@ -5936,16 +5937,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0d: /* fldcw mem */
-                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
+                tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
-                gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
+                gen_helper_fldcw(cpu_env, s->tmp2_i32);
                 break;
             case 0x0e: /* fnstenv mem */
                 gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0f: /* fnstcw mem */
-                gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
-                tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                gen_helper_fnstcw(s->tmp2_i32, cpu_env);
+                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
                 break;
             case 0x1d: /* fldt mem */
@@ -5962,8 +5963,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x2f: /* fnstsw mem */
-                gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
-                tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
+                gen_helper_fnstsw(s->tmp2_i32, cpu_env);
+                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
                 break;
             case 0x3c: /* fbld */
@@ -6241,8 +6242,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             case 0x3c: /* df/4 */
                 switch(rm) {
                 case 0:
-                    gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
-                    tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
+                    gen_helper_fnstsw(s->tmp2_i32, cpu_env);
+                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
                     gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
                     break;
                 default:
@@ -6394,10 +6395,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_movi_i32(cpu_tmp2_i32, val);
-        gen_helper_in_func(ot, s->T1, cpu_tmp2_i32);
+        tcg_gen_movi_i32(s->tmp2_i32, val);
+        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
         gen_op_mov_reg_v(ot, R_EAX, s->T1);
-        gen_bpt_io(s, cpu_tmp2_i32, ot);
+        gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6415,10 +6416,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_movi_i32(cpu_tmp2_i32, val);
+        tcg_gen_movi_i32(s->tmp2_i32, val);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
-        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
-        gen_bpt_io(s, cpu_tmp2_i32, ot);
+        gen_helper_out_func(ot, s->tmp2_i32, cpu_tmp3_i32);
+        gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6433,10 +6434,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-        gen_helper_in_func(ot, s->T1, cpu_tmp2_i32);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
         gen_op_mov_reg_v(ot, R_EAX, s->T1);
-        gen_bpt_io(s, cpu_tmp2_i32, ot);
+        gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6453,10 +6454,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
-        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
-        gen_bpt_io(s, cpu_tmp2_i32, ot);
+        gen_helper_out_func(ot, s->tmp2_i32, cpu_tmp3_i32);
+        gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6734,12 +6735,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
         break;
     case 0xfc: /* cld */
-        tcg_gen_movi_i32(cpu_tmp2_i32, 1);
-        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
+        tcg_gen_movi_i32(s->tmp2_i32, 1);
+        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
         break;
     case 0xfd: /* std */
-        tcg_gen_movi_i32(cpu_tmp2_i32, -1);
-        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
+        tcg_gen_movi_i32(s->tmp2_i32, -1);
+        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
         break;
 
         /************************/
@@ -7071,11 +7072,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             goto illegal_op;
         gen_op_mov_v_reg(ot, s->T0, reg);
         gen_lea_modrm(env, s, modrm);
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         if (ot == MO_16) {
-            gen_helper_boundw(cpu_env, s->A0, cpu_tmp2_i32);
+            gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
         } else {
-            gen_helper_boundl(cpu_env, s->A0, cpu_tmp2_i32);
+            gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
         }
         break;
     case 0x1c8 ... 0x1cf: /* bswap reg */
@@ -7264,8 +7265,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_lldt(cpu_env, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_lldt(cpu_env, s->tmp2_i32);
             }
             break;
         case 1: /* str */
@@ -7285,8 +7286,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
-                gen_helper_ltr(cpu_env, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_ltr(cpu_env, s->tmp2_i32);
             }
             break;
         case 4: /* verr */
@@ -7385,8 +7386,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
                 goto illegal_op;
             }
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, s->tmp2_i32);
             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
             break;
 
@@ -7402,8 +7403,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_xsetbv(cpu_env, s->tmp2_i32, cpu_tmp1_i64);
             /* End TB because translation flags may change.  */
             gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
@@ -7562,8 +7563,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             if (prefixes & PREFIX_LOCK) {
                 goto illegal_op;
             }
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, s->tmp2_i32);
             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
             break;
         case 0xef: /* wrpkru */
@@ -7572,8 +7573,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_wrpkru(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_wrpkru(cpu_env, s->tmp2_i32, cpu_tmp1_i64);
             break;
         CASE_MODRM_OP(6): /* lmsw */
             if (s->cpl != 0) {
@@ -8042,14 +8043,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
                 gen_op_mov_v_reg(ot, s->T0, rm);
-                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
-                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, s->T0);
+                tcg_gen_movi_i32(s->tmp2_i32, reg);
+                gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
                 gen_jmp_im(s, s->pc - s->cs_base);
                 gen_eob(s);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
-                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
-                gen_helper_get_dr(s->T0, cpu_env, cpu_tmp2_i32);
+                tcg_gen_movi_i32(s->tmp2_i32, reg);
+                gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
                 gen_op_mov_reg_v(ot, rm, s->T0);
             }
         }
@@ -8116,8 +8117,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0, s->mem_index, MO_LEUL);
-            gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
+            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
+            gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
             break;
 
         CASE_MODRM_MEM_OP(3): /* stmxcsr */
@@ -8216,8 +8217,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 TCGv base, treg, src, dst;
 
                 /* Preserve hflags bits by testing CR4 at runtime.  */
-                tcg_gen_movi_i32(cpu_tmp2_i32, CR4_FSGSBASE_MASK);
-                gen_helper_cr4_testbit(cpu_env, cpu_tmp2_i32);
+                tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
+                gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
 
                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
@@ -8484,7 +8485,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
 
     dc->tmp0 = tcg_temp_new();
     cpu_tmp1_i64 = tcg_temp_new_i64();
-    cpu_tmp2_i32 = tcg_temp_new_i32();
+    dc->tmp2_i32 = tcg_temp_new_i32();
     cpu_tmp3_i32 = tcg_temp_new_i32();
     dc->tmp4 = tcg_temp_new();
     dc->ptr0 = tcg_temp_new_ptr();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 10/13] target/i386: move cpu_tmp3_i32 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (8 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 09/13] target/i386: move cpu_tmp2_i32 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:56   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 11/13] target/i386: move cpu_tmp1_i64 " Emilio G. Cota
                   ` (3 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 64 ++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index ec68f7dba1..cd880cc2a8 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,7 +79,6 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 
-static TCGv_i32 cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
 
 #include "exec/gen-icount.h"
@@ -143,6 +142,7 @@ typedef struct DisasContext {
     TCGv_ptr ptr0;
     TCGv_ptr ptr1;
     TCGv_i32 tmp2_i32;
+    TCGv_i32 tmp3_i32;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -1159,8 +1159,8 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
 
     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
-    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T0);
-    gen_helper_out_func(ot, s->tmp2_i32, cpu_tmp3_i32);
+    tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
+    gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
     gen_op_movl_T0_Dshift(s, ot);
     gen_op_add_reg_T0(s, s->aflag, R_ESI);
     gen_bpt_io(s, s->tmp2_i32, ot);
@@ -1426,8 +1426,8 @@ static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
     if (s->cc_op == CC_OP_DYNAMIC) {
         oldop = cpu_cc_op;
     } else {
-        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
-        oldop = cpu_tmp3_i32;
+        tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
+        oldop = s->tmp3_i32;
     }
 
     /* Conditionally store the CC_OP value.  */
@@ -1546,11 +1546,11 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
 #ifdef TARGET_X86_64
     case MO_32:
         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
+        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
         if (is_right) {
-            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
         } else {
-            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
         }
         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
         break;
@@ -1593,9 +1593,9 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
     t1 = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(t1, s->T1);
     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
-    tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
+    tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
-                        s->tmp2_i32, cpu_tmp3_i32);
+                        s->tmp2_i32, s->tmp3_i32);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);
 
@@ -3912,11 +3912,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 switch (ot) {
                 default:
                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-                    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
-                    tcg_gen_mulu2_i32(s->tmp2_i32, cpu_tmp3_i32,
-                                      s->tmp2_i32, cpu_tmp3_i32);
+                    tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
+                    tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
+                                      s->tmp2_i32, s->tmp3_i32);
                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
-                    tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
+                    tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
                     break;
 #ifdef TARGET_X86_64
                 case MO_64:
@@ -4882,11 +4882,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             default:
             case MO_32:
                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
-                tcg_gen_mulu2_i32(s->tmp2_i32, cpu_tmp3_i32,
-                                  s->tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
+                tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
+                                  s->tmp2_i32, s->tmp3_i32);
                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
                 set_cc_op(s, CC_OP_MULL);
@@ -4933,14 +4933,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             default:
             case MO_32:
                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
-                tcg_gen_muls2_i32(s->tmp2_i32, cpu_tmp3_i32,
-                                  s->tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
+                tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
+                                  s->tmp2_i32, s->tmp3_i32);
                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
-                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
                 set_cc_op(s, CC_OP_MULL);
                 break;
@@ -5210,13 +5210,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 #endif
         case MO_32:
             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
-            tcg_gen_muls2_i32(s->tmp2_i32, cpu_tmp3_i32,
-                              s->tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
+            tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
+                              s->tmp2_i32, s->tmp3_i32);
             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
-            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
             break;
         default:
@@ -6417,8 +6417,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_io_start();
 	}
         tcg_gen_movi_i32(s->tmp2_i32, val);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
-        gen_helper_out_func(ot, s->tmp2_i32, cpu_tmp3_i32);
+        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
+        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
         gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
@@ -6455,8 +6455,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_io_start();
 	}
         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
-        gen_helper_out_func(ot, s->tmp2_i32, cpu_tmp3_i32);
+        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
+        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
         gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
@@ -8486,7 +8486,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     dc->tmp0 = tcg_temp_new();
     cpu_tmp1_i64 = tcg_temp_new_i64();
     dc->tmp2_i32 = tcg_temp_new_i32();
-    cpu_tmp3_i32 = tcg_temp_new_i32();
+    dc->tmp3_i32 = tcg_temp_new_i32();
     dc->tmp4 = tcg_temp_new();
     dc->ptr0 = tcg_temp_new_ptr();
     dc->ptr1 = tcg_temp_new_ptr();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 11/13] target/i386: move cpu_tmp1_i64 to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (9 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 10/13] target/i386: move cpu_tmp3_i32 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:57   ` Richard Henderson
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs " Emilio G. Cota
                   ` (2 subsequent siblings)
  13 siblings, 1 reply; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 160 ++++++++++++++++++++--------------------
 1 file changed, 80 insertions(+), 80 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index cd880cc2a8..61a98ef872 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -79,8 +79,6 @@ static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 
-static TCGv_i64 cpu_tmp1_i64;
-
 #include "exec/gen-icount.h"
 
 #ifdef TARGET_X86_64
@@ -143,6 +141,7 @@ typedef struct DisasContext {
     TCGv_ptr ptr1;
     TCGv_i32 tmp2_i32;
     TCGv_i32 tmp3_i32;
+    TCGv_i64 tmp1_i64;
 
     sigjmp_buf jmpbuf;
 } DisasContext;
@@ -2107,12 +2106,12 @@ static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
 {
     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
 
-    tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
+    tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
     if (!CODE64(s)) {
-        tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
+        tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
     }
-    tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
-    tcg_gen_extrl_i64_i32(s->tmp2_i32, cpu_tmp1_i64);
+    tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
+    tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
     gen_helper_bndck(cpu_env, s->tmp2_i32);
 }
 
@@ -2641,48 +2640,48 @@ static void gen_jmp(DisasContext *s, target_ulong eip)
 
 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
 {
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
 }
 
 static inline void gen_stq_env_A0(DisasContext *s, int offset)
 {
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
+    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
 }
 
 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
 {
     int mem_index = s->mem_index;
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->tmp0, mem_index, MO_LEQ);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
 }
 
 static inline void gen_sto_env_A0(DisasContext *s, int offset)
 {
     int mem_index = s->mem_index;
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->tmp0, mem_index, MO_LEQ);
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
 }
 
-static inline void gen_op_movo(int d_offset, int s_offset)
+static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
 {
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
 }
 
-static inline void gen_op_movq(int d_offset, int s_offset)
+static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
 {
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
 }
 
 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
@@ -2691,10 +2690,10 @@ static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
 }
 
-static inline void gen_op_movq_env_0(int d_offset)
+static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
 {
-    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+    tcg_gen_movi_i64(s->tmp1_i64, 0);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
 }
 
 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
@@ -3178,9 +3177,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
-                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
+                tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
                                offsetof(CPUX86State,fpregs[rm].mmx));
-                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
+                tcg_gen_st_i64(s->tmp1_i64, cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
             }
             break;
@@ -3195,7 +3194,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
+                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
                             offsetof(CPUX86State,xmm_regs[rm]));
             }
             break;
@@ -3230,7 +3229,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
             break;
@@ -3243,7 +3242,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             } else {
                 /* movhlps */
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
             }
             break;
@@ -3270,10 +3269,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
-            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
+            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
             break;
         case 0x016: /* movhps */
@@ -3285,7 +3284,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             } else {
                 /* movlhps */
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
             break;
@@ -3361,10 +3360,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
-            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
+            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
@@ -3372,7 +3371,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
-                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
+                gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
                             offsetof(CPUX86State,fpregs[reg].mmx));
             }
             break;
@@ -3387,7 +3386,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
+                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
                             offsetof(CPUX86State,xmm_regs[reg]));
             }
             break;
@@ -3410,7 +3409,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
             }
             break;
@@ -3643,22 +3642,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
-                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
+                gen_op_movq_env_0(s,
+                                  offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
             }
             break;
         case 0x2d6: /* movq2dq */
             gen_helper_enter_mmx(cpu_env);
             rm = (modrm & 7);
-            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                         offsetof(CPUX86State,fpregs[rm].mmx));
-            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
+            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
             break;
         case 0x3d6: /* movdq2q */
             gen_helper_enter_mmx(cpu_env);
             rm = (modrm & 7) | REX_B(s);
-            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
+            gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             break;
         case 0xd7: /* pmovmskb */
@@ -4174,13 +4174,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         }
                     } else { /* pextrq */
 #ifdef TARGET_X86_64
-                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
+                        tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].ZMM_Q(val & 1)));
                         if (mod == 3) {
-                            tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
+                            tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
                         } else {
-                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
+                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
                                                 s->mem_index, MO_LEQ);
                         }
 #else
@@ -4251,12 +4251,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     } else { /* pinsrq */
 #ifdef TARGET_X86_64
                         if (mod == 3) {
-                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
+                            gen_op_mov_v_reg(ot, s->tmp1_i64, rm);
                         } else {
-                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
+                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
                                                 s->mem_index, MO_LEQ);
                         }
-                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
+                        tcg_gen_st_i64(s->tmp1_i64, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].ZMM_Q(val & 1)));
 #else
@@ -5831,9 +5831,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
+                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
-                        gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
+                        gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
                         break;
                     case 3:
                     default:
@@ -5870,9 +5870,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
+                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
-                        gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
+                        gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
                         break;
                     case 3:
                     default:
@@ -5891,8 +5891,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
-                        gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
+                        gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
+                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         break;
                     case 3:
@@ -5917,8 +5917,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
-                        gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
+                        gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
+                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         break;
                     case 3:
@@ -5975,12 +5975,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_helper_fpop(cpu_env);
                 break;
             case 0x3d: /* fildll */
-                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
-                gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
+                tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
+                gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
                 break;
             case 0x3f: /* fistpll */
-                gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
-                tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
+                gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
+                tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
                 gen_helper_fpop(cpu_env);
                 break;
             default:
@@ -7387,8 +7387,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, s->tmp2_i32);
-            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
+            gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
+            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
             break;
 
         case 0xd1: /* xsetbv */
@@ -7401,10 +7401,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 break;
             }
-            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_xsetbv(cpu_env, s->tmp2_i32, cpu_tmp1_i64);
+            gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
             /* End TB because translation flags may change.  */
             gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
@@ -7564,17 +7564,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, s->tmp2_i32);
-            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
+            gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
+            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
             break;
         case 0xef: /* wrpkru */
             if (prefixes & PREFIX_LOCK) {
                 goto illegal_op;
             }
-            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_wrpkru(cpu_env, s->tmp2_i32, cpu_tmp1_i64);
+            gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
             break;
         CASE_MODRM_OP(6): /* lmsw */
             if (s->cpl != 0) {
@@ -8141,9 +8141,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            gen_helper_xsave(cpu_env, s->A0, cpu_tmp1_i64);
+            gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
             break;
 
         CASE_MODRM_MEM_OP(5): /* xrstor */
@@ -8153,9 +8153,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            gen_helper_xrstor(cpu_env, s->A0, cpu_tmp1_i64);
+            gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
             /* XRSTOR is how MPX is enabled, which changes how
                we translate.  Thus we need to end the TB.  */
             gen_update_cc_op(s);
@@ -8181,9 +8181,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     goto illegal_op;
                 }
                 gen_lea_modrm(env, s, modrm);
-                tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+                tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                       cpu_regs[R_EDX]);
-                gen_helper_xsaveopt(cpu_env, s->A0, cpu_tmp1_i64);
+                gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
             }
             break;
 
@@ -8484,7 +8484,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     dc->A0 = tcg_temp_new();
 
     dc->tmp0 = tcg_temp_new();
-    cpu_tmp1_i64 = tcg_temp_new_i64();
+    dc->tmp1_i64 = tcg_temp_new_i64();
     dc->tmp2_i32 = tcg_temp_new_i32();
     dc->tmp3_i32 = tcg_temp_new_i32();
     dc->tmp4 = tcg_temp_new();
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs to DisasContext
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (10 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 11/13] target/i386: move cpu_tmp1_i64 " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-11 20:58   ` Richard Henderson
  2018-09-13 14:31   ` Alex Bennée
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 13/13] configure: enable mttcg for i386 and x86_64 Emilio G. Cota
  2018-09-12 12:46 ` [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Paolo Bonzini
  13 siblings, 2 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

And convert it to a bool to use an existing hole
in the struct.

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 307 ++++++++++++++++++++--------------------
 1 file changed, 154 insertions(+), 153 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 61a98ef872..b8222dc4ba 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -81,10 +81,6 @@ static TCGv_i64 cpu_bndu[4];
 
 #include "exec/gen-icount.h"
 
-#ifdef TARGET_X86_64
-static int x86_64_hregs;
-#endif
-
 typedef struct DisasContext {
     DisasContextBase base;
 
@@ -109,6 +105,9 @@ typedef struct DisasContext {
     int ss32;   /* 32 bit stack segment */
     CCOp cc_op;  /* current CC operation */
     bool cc_op_dirty;
+#ifdef TARGET_X86_64
+    bool x86_64_hregs;
+#endif
     int addseg; /* non zero if either DS/ES/SS have a non zero base */
     int f_st;   /* currently unused */
     int vm86;   /* vm86 mode */
@@ -307,13 +306,13 @@ static void gen_update_cc_op(DisasContext *s)
  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
  * true for this special case, false otherwise.
  */
-static inline bool byte_reg_is_xH(int reg)
+static inline bool byte_reg_is_xH(DisasContext *s, int reg)
 {
     if (reg < 4) {
         return false;
     }
 #ifdef TARGET_X86_64
-    if (reg >= 8 || x86_64_hregs) {
+    if (reg >= 8 || s->x86_64_hregs) {
         return false;
     }
 #endif
@@ -360,11 +359,11 @@ static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 }
 
-static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
+static void gen_op_mov_reg_v(DisasContext *s, TCGMemOp ot, int reg, TCGv t0)
 {
     switch(ot) {
     case MO_8:
-        if (!byte_reg_is_xH(reg)) {
+        if (!byte_reg_is_xH(s, reg)) {
             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
         } else {
             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
@@ -388,9 +387,10 @@ static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
     }
 }
 
-static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
+static inline
+void gen_op_mov_v_reg(DisasContext *s, TCGMemOp ot, TCGv t0, int reg)
 {
-    if (ot == MO_8 && byte_reg_is_xH(reg)) {
+    if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
     } else {
         tcg_gen_mov_tl(t0, cpu_regs[reg]);
@@ -414,13 +414,13 @@ static inline
 void gen_op_add_reg_im(DisasContext *s, TCGMemOp size, int reg, int32_t val)
 {
     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
-    gen_op_mov_reg_v(size, reg, s->tmp0);
+    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 }
 
 static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
 {
     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
-    gen_op_mov_reg_v(size, reg, s->tmp0);
+    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 }
 
 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
@@ -438,7 +438,7 @@ static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
     if (d == OR_TMP0) {
         gen_op_st_v(s, idx, s->T0, s->A0);
     } else {
-        gen_op_mov_reg_v(idx, d, s->T0);
+        gen_op_mov_reg_v(s, idx, d, s->T0);
     }
 }
 
@@ -1077,7 +1077,7 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
 
 static inline void gen_stos(DisasContext *s, TCGMemOp ot)
 {
-    gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
+    gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
     gen_string_movl_A0_EDI(s);
     gen_op_st_v(s, ot, s->T0, s->A0);
     gen_op_movl_T0_Dshift(s, ot);
@@ -1088,7 +1088,7 @@ static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
     gen_op_ld_v(s, ot, s->T0, s->A0);
-    gen_op_mov_reg_v(ot, R_EAX, s->T0);
+    gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
     gen_op_movl_T0_Dshift(s, ot);
     gen_op_add_reg_T0(s, s->aflag, R_ESI);
 }
@@ -1272,7 +1272,7 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
 {
     if (d != OR_TMP0) {
-        gen_op_mov_v_reg(ot, s1->T0, d);
+        gen_op_mov_v_reg(s1, ot, s1->T0, d);
     } else if (!(s1->prefix & PREFIX_LOCK)) {
         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
     }
@@ -1383,7 +1383,7 @@ static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
                                     s1->mem_index, ot | MO_LE);
     } else {
         if (d != OR_TMP0) {
-            gen_op_mov_v_reg(ot, s1->T0, d);
+            gen_op_mov_v_reg(s1, ot, s1->T0, d);
         } else {
             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
         }
@@ -1450,7 +1450,7 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     if (op1 == OR_TMP0) {
         gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, s->T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     }
 
     tcg_gen_andi_tl(s->T1, s->T1, mask);
@@ -1486,7 +1486,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
     if (op1 == OR_TMP0)
         gen_op_ld_v(s, ot, s->T0, s->A0);
     else
-        gen_op_mov_v_reg(ot, s->T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
 
     op2 &= mask;
     if (op2 != 0) {
@@ -1526,7 +1526,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
     if (op1 == OR_TMP0) {
         gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, s->T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     }
 
     tcg_gen_andi_tl(s->T1, s->T1, mask);
@@ -1612,7 +1612,7 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
     if (op1 == OR_TMP0) {
         gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, s->T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     }
 
     op2 &= mask;
@@ -1690,7 +1690,7 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     if (op1 == OR_TMP0)
         gen_op_ld_v(s, ot, s->T0, s->A0);
     else
-        gen_op_mov_v_reg(ot, s->T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     
     if (is_right) {
         switch (ot) {
@@ -1746,7 +1746,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     if (op1 == OR_TMP0) {
         gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, s->T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     }
 
     count = tcg_temp_new();
@@ -1820,7 +1820,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
 {
     if (s != OR_TMP1)
-        gen_op_mov_v_reg(ot, s1->T1, s);
+        gen_op_mov_v_reg(s1, ot, s1->T1, s);
     switch(op) {
     case OP_ROL:
         gen_rot_rm_T1(s1, ot, d, 0);
@@ -2133,23 +2133,23 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
     if (mod == 3) {
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_v_reg(ot, s->T0, reg);
-            gen_op_mov_reg_v(ot, rm, s->T0);
+                gen_op_mov_v_reg(s, ot, s->T0, reg);
+            gen_op_mov_reg_v(s, ot, rm, s->T0);
         } else {
-            gen_op_mov_v_reg(ot, s->T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
         }
     } else {
         gen_lea_modrm(env, s, modrm);
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_v_reg(ot, s->T0, reg);
+                gen_op_mov_v_reg(s, ot, s->T0, reg);
             gen_op_st_v(s, ot, s->T0, s->A0);
         } else {
             gen_op_ld_v(s, ot, s->T0, s->A0);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
         }
     }
 }
@@ -2260,7 +2260,7 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
 
     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
                        s->T0, cpu_regs[reg]);
-    gen_op_mov_reg_v(ot, reg, s->T0);
+    gen_op_mov_reg_v(s, ot, reg, s->T0);
 
     if (cc.mask != -1) {
         tcg_temp_free(cc.reg);
@@ -2354,7 +2354,7 @@ static void gen_push_v(DisasContext *s, TCGv val)
     }
 
     gen_op_st_v(s, d_ot, val, s->A0);
-    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
+    gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
 }
 
 /* two step pop is necessary for precise exceptions */
@@ -2409,7 +2409,7 @@ static void gen_popa(DisasContext *s)
         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
         gen_op_ld_v(s, d_ot, s->T0, s->A0);
-        gen_op_mov_reg_v(d_ot, 7 - i, s->T0);
+        gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
     }
 
     gen_stack_update(s, 8 * size);
@@ -2448,11 +2448,11 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
     }
 
     /* Copy the FrameTemp value to EBP.  */
-    gen_op_mov_reg_v(a_ot, R_EBP, s->T1);
+    gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
 
     /* Compute the final value of ESP.  */
     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
-    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
+    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
 }
 
 static void gen_leave(DisasContext *s)
@@ -2465,8 +2465,8 @@ static void gen_leave(DisasContext *s)
 
     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
 
-    gen_op_mov_reg_v(d_ot, R_EBP, s->T0);
-    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
+    gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
+    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
 }
 
 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
@@ -3598,7 +3598,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 goto illegal_op;
 #endif
             }
-            gen_op_mov_reg_v(ot, reg, s->T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T0);
             break;
         case 0xc4: /* pinsrw */
         case 0x1c4:
@@ -3633,7 +3633,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
             }
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_op_mov_reg_v(ot, reg, s->T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T0);
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
@@ -3787,7 +3787,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                  s->T0, tcg_const_i32(8 << ot));
 
                 ot = mo_64_32(s->dflag);
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
                 break;
 
             case 0x1f0: /* crc32 or movbe */
@@ -3814,7 +3814,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 if ((b & 1) == 0) {
                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
                                        s->mem_index, ot | MO_BE);
-                    gen_op_mov_reg_v(ot, reg, s->T0);
+                    gen_op_mov_reg_v(s, ot, reg, s->T0);
                 } else {
                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
                                        s->mem_index, ot | MO_BE);
@@ -3830,7 +3830,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
                 gen_op_update1_cc(s);
                 set_cc_op(s, CC_OP_LOGICB + ot);
                 break;
@@ -3868,7 +3868,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_subi_tl(s->T1, s->T1, 1);
                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
 
-                    gen_op_mov_reg_v(ot, reg, s->T0);
+                    gen_op_mov_reg_v(s, ot, reg, s->T0);
                     gen_op_update1_cc(s);
                     set_cc_op(s, CC_OP_LOGICB + ot);
                 }
@@ -3896,7 +3896,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 tcg_gen_movi_tl(s->A0, -1);
                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
                 gen_op_update1_cc(s);
                 set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
@@ -4071,7 +4071,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     }
                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
                 }
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
                 break;
 
             case 0x0f3:
@@ -4104,7 +4104,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     goto unknown_op;
                 }
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-                gen_op_mov_reg_v(ot, s->vex_v, s->T0);
+                gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
                 set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
 
@@ -4145,7 +4145,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_B(val & 15)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, s->T0);
+                        gen_op_mov_reg_v(s, ot, rm, s->T0);
                     } else {
                         tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_UB);
@@ -4155,7 +4155,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_W(val & 7)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, s->T0);
+                        gen_op_mov_reg_v(s, ot, rm, s->T0);
                     } else {
                         tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_LEUW);
@@ -4192,7 +4192,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_L(val & 3)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, s->T0);
+                        gen_op_mov_reg_v(s, ot, rm, s->T0);
                     } else {
                         tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_LEUL);
@@ -4200,7 +4200,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     break;
                 case 0x20: /* pinsrb */
                     if (mod == 3) {
-                        gen_op_mov_v_reg(MO_32, s->T0, rm);
+                        gen_op_mov_v_reg(s, MO_32, s->T0, rm);
                     } else {
                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
                                            s->mem_index, MO_UB);
@@ -4251,7 +4251,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     } else { /* pinsrq */
 #ifdef TARGET_X86_64
                         if (mod == 3) {
-                            gen_op_mov_v_reg(ot, s->tmp1_i64, rm);
+                            gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
                         } else {
                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
                                                 s->mem_index, MO_LEQ);
@@ -4326,7 +4326,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
                 }
-                gen_op_mov_reg_v(ot, reg, s->T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
                 break;
 
             default:
@@ -4489,7 +4489,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 #ifdef TARGET_X86_64
     s->rex_x = 0;
     s->rex_b = 0;
-    x86_64_hregs = 0;
+    s->x86_64_hregs = false;
 #endif
     s->rip_offset = 0; /* for relative ip address */
     s->vex_l = 0;
@@ -4548,7 +4548,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             rex_r = (b & 0x4) << 1;
             s->rex_x = (b & 0x2) << 2;
             REX_B(s) = (b & 0x1) << 3;
-            x86_64_hregs = 1; /* select uniform byte register addressing */
+            /* select uniform byte register addressing */
+            s->x86_64_hregs = true;
             goto next_byte;
         }
         break;
@@ -4576,7 +4577,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
 #ifdef TARGET_X86_64
-            if (x86_64_hregs) {
+            if (s->x86_64_hregs) {
                 goto illegal_op;
             }
 #endif
@@ -4681,12 +4682,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     /* xor reg, reg optimisation */
                     set_cc_op(s, CC_OP_CLR);
                     tcg_gen_movi_tl(s->T0, 0);
-                    gen_op_mov_reg_v(ot, reg, s->T0);
+                    gen_op_mov_reg_v(s, ot, reg, s->T0);
                     break;
                 } else {
                     opreg = rm;
                 }
-                gen_op_mov_v_reg(ot, s->T1, reg);
+                gen_op_mov_v_reg(s, ot, s->T1, reg);
                 gen_op(s, op, ot, opreg);
                 break;
             case 1: /* OP Gv, Ev */
@@ -4700,7 +4701,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
                 } else {
-                    gen_op_mov_v_reg(ot, s->T1, rm);
+                    gen_op_mov_v_reg(s, ot, s->T1, rm);
                 }
                 gen_op(s, op, ot, reg);
                 break;
@@ -4786,7 +4787,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, s->T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
         }
 
         switch(op) {
@@ -4809,7 +4810,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 if (mod != 3) {
                     gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, s->T0);
+                    gen_op_mov_reg_v(s, ot, rm, s->T0);
                 }
             }
             break;
@@ -4847,7 +4848,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 if (mod != 3) {
                     gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, s->T0);
+                    gen_op_mov_reg_v(s, ot, rm, s->T0);
                 }
             }
             gen_op_update_neg_cc(s);
@@ -4856,26 +4857,26 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 4: /* mul */
             switch(ot) {
             case MO_8:
-                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
+                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
                 tcg_gen_ext8u_tl(s->T0, s->T0);
                 tcg_gen_ext8u_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
-                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
+                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
                 tcg_gen_ext16u_tl(s->T0, s->T0);
                 tcg_gen_ext16u_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_shri_tl(s->T0, s->T0, 16);
-                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
+                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
                 set_cc_op(s, CC_OP_MULW);
                 break;
@@ -4905,29 +4906,29 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 5: /* imul */
             switch(ot) {
             case MO_8:
-                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
+                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
                 tcg_gen_ext8s_tl(s->T0, s->T0);
                 tcg_gen_ext8s_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
-                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
+                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
                 tcg_gen_ext16s_tl(s->T0, s->T0);
                 tcg_gen_ext16s_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
                 tcg_gen_shri_tl(s->T0, s->T0, 16);
-                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
+                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
                 set_cc_op(s, CC_OP_MULW);
                 break;
             default:
@@ -5026,7 +5027,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             if (op >= 2 && op != 3 && op != 5)
                 gen_op_ld_v(s, ot, s->T0, s->A0);
         } else {
-            gen_op_mov_v_reg(ot, s->T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
         }
 
         switch(op) {
@@ -5115,7 +5116,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_v_reg(ot, s->T1, reg);
+        gen_op_mov_v_reg(s, ot, s->T1, reg);
         gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
@@ -5125,7 +5126,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         ot = mo_b_d(b, dflag);
         val = insn_get(env, s, ot);
 
-        gen_op_mov_v_reg(ot, s->T0, OR_EAX);
+        gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
         tcg_gen_movi_tl(s->T1, val);
         gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
@@ -5135,20 +5136,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         switch (dflag) {
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
+            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
             tcg_gen_ext32s_tl(s->T0, s->T0);
-            gen_op_mov_reg_v(MO_64, R_EAX, s->T0);
+            gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
             break;
 #endif
         case MO_32:
-            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
+            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
             tcg_gen_ext16s_tl(s->T0, s->T0);
-            gen_op_mov_reg_v(MO_32, R_EAX, s->T0);
+            gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
             break;
         case MO_16:
-            gen_op_mov_v_reg(MO_8, s->T0, R_EAX);
+            gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
             tcg_gen_ext8s_tl(s->T0, s->T0);
-            gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+            gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
             break;
         default:
             tcg_abort();
@@ -5158,22 +5159,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         switch (dflag) {
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_op_mov_v_reg(MO_64, s->T0, R_EAX);
+            gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
             tcg_gen_sari_tl(s->T0, s->T0, 63);
-            gen_op_mov_reg_v(MO_64, R_EDX, s->T0);
+            gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
             break;
 #endif
         case MO_32:
-            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
+            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
             tcg_gen_ext32s_tl(s->T0, s->T0);
             tcg_gen_sari_tl(s->T0, s->T0, 31);
-            gen_op_mov_reg_v(MO_32, R_EDX, s->T0);
+            gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
             break;
         case MO_16:
-            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
+            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
             tcg_gen_ext16s_tl(s->T0, s->T0);
             tcg_gen_sari_tl(s->T0, s->T0, 15);
-            gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
+            gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
             break;
         default:
             tcg_abort();
@@ -5197,7 +5198,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             val = (int8_t)insn_get(env, s, MO_8);
             tcg_gen_movi_tl(s->T1, val);
         } else {
-            gen_op_mov_v_reg(ot, s->T1, reg);
+            gen_op_mov_v_reg(s, ot, s->T1, reg);
         }
         switch (ot) {
 #ifdef TARGET_X86_64
@@ -5227,7 +5228,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
             tcg_gen_ext16s_tl(s->tmp0, s->T0);
             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
-            gen_op_mov_reg_v(ot, reg, s->T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T0);
             break;
         }
         set_cc_op(s, CC_OP_MULB + ot);
@@ -5238,13 +5239,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
-        gen_op_mov_v_reg(ot, s->T0, reg);
+        gen_op_mov_v_reg(s, ot, s->T0, reg);
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_v_reg(ot, s->T1, rm);
+            gen_op_mov_v_reg(s, ot, s->T1, rm);
             tcg_gen_add_tl(s->T0, s->T0, s->T1);
-            gen_op_mov_reg_v(ot, reg, s->T1);
-            gen_op_mov_reg_v(ot, rm, s->T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T1);
+            gen_op_mov_reg_v(s, ot, rm, s->T0);
         } else {
             gen_lea_modrm(env, s, modrm);
             if (s->prefix & PREFIX_LOCK) {
@@ -5256,7 +5257,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
                 gen_op_st_v(s, ot, s->T0, s->A0);
             }
-            gen_op_mov_reg_v(ot, reg, s->T1);
+            gen_op_mov_reg_v(s, ot, reg, s->T1);
         }
         gen_op_update2_cc(s);
         set_cc_op(s, CC_OP_ADDB + ot);
@@ -5273,7 +5274,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             oldv = tcg_temp_new();
             newv = tcg_temp_new();
             cmpv = tcg_temp_new();
-            gen_op_mov_v_reg(ot, newv, reg);
+            gen_op_mov_v_reg(s, ot, newv, reg);
             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
 
             if (s->prefix & PREFIX_LOCK) {
@@ -5283,11 +5284,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_lea_modrm(env, s, modrm);
                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
                                           s->mem_index, ot | MO_LE);
-                gen_op_mov_reg_v(ot, R_EAX, oldv);
+                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
             } else {
                 if (mod == 3) {
                     rm = (modrm & 7) | REX_B(s);
-                    gen_op_mov_v_reg(ot, oldv, rm);
+                    gen_op_mov_v_reg(s, ot, oldv, rm);
                 } else {
                     gen_lea_modrm(env, s, modrm);
                     gen_op_ld_v(s, ot, oldv, s->A0);
@@ -5298,15 +5299,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 /* store value = (old == cmp ? new : old);  */
                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
                 if (mod == 3) {
-                    gen_op_mov_reg_v(ot, R_EAX, oldv);
-                    gen_op_mov_reg_v(ot, rm, newv);
+                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
+                    gen_op_mov_reg_v(s, ot, rm, newv);
                 } else {
                     /* Perform an unconditional store cycle like physical cpu;
                        must be before changing accumulator to ensure
                        idempotency if the store faults and the instruction
                        is restarted */
                     gen_op_st_v(s, ot, newv, s->A0);
-                    gen_op_mov_reg_v(ot, R_EAX, oldv);
+                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
                 }
             }
             tcg_gen_mov_tl(cpu_cc_src, oldv);
@@ -5351,14 +5352,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         /**************************/
         /* push/pop */
     case 0x50 ... 0x57: /* push */
-        gen_op_mov_v_reg(MO_32, s->T0, (b & 7) | REX_B(s));
+        gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
         gen_push_v(s, s->T0);
         break;
     case 0x58 ... 0x5f: /* pop */
         ot = gen_pop_T0(s);
         /* NOTE: order is important for pop %sp */
         gen_pop_update(s, ot);
-        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), s->T0);
+        gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
         break;
     case 0x60: /* pusha */
         if (CODE64(s))
@@ -5388,7 +5389,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             /* NOTE: order is important for pop %sp */
             gen_pop_update(s, ot);
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_reg_v(ot, rm, s->T0);
+            gen_op_mov_reg_v(s, ot, rm, s->T0);
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
@@ -5478,7 +5479,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod != 3) {
             gen_op_st_v(s, ot, s->T0, s->A0);
         } else {
-            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), s->T0);
+            gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
         }
         break;
     case 0x8a:
@@ -5488,7 +5489,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_reg_v(ot, reg, s->T0);
+        gen_op_mov_reg_v(s, ot, reg, s->T0);
         break;
     case 0x8e: /* mov seg, Gv */
         modrm = x86_ldub_code(env, s);
@@ -5540,10 +5541,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
+                if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
                 } else {
-                    gen_op_mov_v_reg(ot, s->T0, rm);
+                    gen_op_mov_v_reg(s, ot, s->T0, rm);
                     switch (s_ot) {
                     case MO_UB:
                         tcg_gen_ext8u_tl(s->T0, s->T0);
@@ -5560,11 +5561,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         break;
                     }
                 }
-                gen_op_mov_reg_v(d_ot, reg, s->T0);
+                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
-                gen_op_mov_reg_v(d_ot, reg, s->T0);
+                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
             }
         }
         break;
@@ -5579,7 +5580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
             TCGv ea = gen_lea_modrm_1(s, a);
             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
-            gen_op_mov_reg_v(dflag, reg, s->A0);
+            gen_op_mov_reg_v(s, dflag, reg, s->A0);
         }
         break;
 
@@ -5605,9 +5606,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_add_A0_ds_seg(s);
             if ((b & 2) == 0) {
                 gen_op_ld_v(s, ot, s->T0, s->A0);
-                gen_op_mov_reg_v(ot, R_EAX, s->T0);
+                gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
             } else {
-                gen_op_mov_v_reg(ot, s->T0, R_EAX);
+                gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
                 gen_op_st_v(s, ot, s->T0, s->A0);
             }
         }
@@ -5619,12 +5620,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_extu(s->aflag, s->A0);
         gen_add_A0_ds_seg(s);
         gen_op_ld_v(s, MO_8, s->T0, s->A0);
-        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
+        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
         break;
     case 0xb0 ... 0xb7: /* mov R, Ib */
         val = insn_get(env, s, MO_8);
         tcg_gen_movi_tl(s->T0, val);
-        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), s->T0);
+        gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
         break;
     case 0xb8 ... 0xbf: /* mov R, Iv */
 #ifdef TARGET_X86_64
@@ -5634,7 +5635,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tmp = x86_ldq_code(env, s);
             reg = (b & 7) | REX_B(s);
             tcg_gen_movi_tl(s->T0, tmp);
-            gen_op_mov_reg_v(MO_64, reg, s->T0);
+            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
         } else
 #endif
         {
@@ -5642,7 +5643,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             val = insn_get(env, s, ot);
             reg = (b & 7) | REX_B(s);
             tcg_gen_movi_tl(s->T0, val);
-            gen_op_mov_reg_v(ot, reg, s->T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T0);
         }
         break;
 
@@ -5661,17 +5662,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
         do_xchg_reg:
-            gen_op_mov_v_reg(ot, s->T0, reg);
-            gen_op_mov_v_reg(ot, s->T1, rm);
-            gen_op_mov_reg_v(ot, rm, s->T0);
-            gen_op_mov_reg_v(ot, reg, s->T1);
+            gen_op_mov_v_reg(s, ot, s->T0, reg);
+            gen_op_mov_v_reg(s, ot, s->T1, rm);
+            gen_op_mov_reg_v(s, ot, rm, s->T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T1);
         } else {
             gen_lea_modrm(env, s, modrm);
-            gen_op_mov_v_reg(ot, s->T0, reg);
+            gen_op_mov_v_reg(s, ot, s->T0, reg);
             /* for xchg, lock is implicit */
             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
                                    s->mem_index, ot | MO_LE);
-            gen_op_mov_reg_v(ot, reg, s->T1);
+            gen_op_mov_reg_v(s, ot, reg, s->T1);
         }
         break;
     case 0xc4: /* les Gv */
@@ -5704,7 +5705,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_op_ld_v(s, MO_16, s->T0, s->A0);
         gen_movl_seg_T0(s, op);
         /* then put the data */
-        gen_op_mov_reg_v(ot, reg, s->T1);
+        gen_op_mov_reg_v(s, ot, reg, s->T1);
         if (s->base.is_jmp) {
             gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
@@ -5783,7 +5784,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         } else {
             opreg = rm;
         }
-        gen_op_mov_v_reg(ot, s->T1, reg);
+        gen_op_mov_v_reg(s, ot, s->T1, reg);
 
         if (shift) {
             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
@@ -6244,7 +6245,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 case 0:
                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
-                    gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
+                    gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
                     break;
                 default:
                     goto unknown_op;
@@ -6397,7 +6398,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 	}
         tcg_gen_movi_i32(s->tmp2_i32, val);
         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
-        gen_op_mov_reg_v(ot, R_EAX, s->T1);
+        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
         gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
@@ -6411,7 +6412,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         tcg_gen_movi_tl(s->T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_v_reg(ot, s->T1, R_EAX);
+        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
 
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
@@ -6436,7 +6437,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 	}
         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
-        gen_op_mov_reg_v(ot, R_EAX, s->T1);
+        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
         gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
@@ -6449,7 +6450,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_v_reg(ot, s->T1, R_EAX);
+        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
 
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
@@ -6708,7 +6709,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x9e: /* sahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
-        gen_op_mov_v_reg(MO_8, s->T0, R_AH);
+        gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
         gen_compute_eflags(s);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
@@ -6720,7 +6721,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_compute_eflags(s);
         /* Note: gen_compute_eflags() only gives the condition codes */
         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
-        gen_op_mov_reg_v(MO_8, R_AH, s->T0);
+        gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
         break;
     case 0xf5: /* cmc */
         gen_compute_eflags(s);
@@ -6758,7 +6759,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, s->T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
         }
         /* load shift */
         val = x86_ldub_code(env, s);
@@ -6784,7 +6785,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
-        gen_op_mov_v_reg(MO_32, s->T1, reg);
+        gen_op_mov_v_reg(s, MO_32, s->T1, reg);
         if (mod != 3) {
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
             /* specific case: we need to add a displacement */
@@ -6797,7 +6798,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, s->T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
         }
     bt_op:
         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
@@ -6847,7 +6848,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 if (mod != 3) {
                     gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, s->T0);
+                    gen_op_mov_reg_v(s, ot, rm, s->T0);
                 }
             }
         }
@@ -6930,7 +6931,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
             }
         }
-        gen_op_mov_reg_v(ot, reg, s->T0);
+        gen_op_mov_reg_v(s, ot, reg, s->T0);
         break;
         /************************/
         /* bcd */
@@ -7070,7 +7071,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_op_mov_v_reg(ot, s->T0, reg);
+        gen_op_mov_v_reg(s, ot, s->T0, reg);
         gen_lea_modrm(env, s, modrm);
         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         if (ot == MO_16) {
@@ -7083,16 +7084,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = (b & 7) | REX_B(s);
 #ifdef TARGET_X86_64
         if (dflag == MO_64) {
-            gen_op_mov_v_reg(MO_64, s->T0, reg);
+            gen_op_mov_v_reg(s, MO_64, s->T0, reg);
             tcg_gen_bswap64_i64(s->T0, s->T0);
-            gen_op_mov_reg_v(MO_64, reg, s->T0);
+            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
         } else
 #endif
         {
-            gen_op_mov_v_reg(MO_32, s->T0, reg);
+            gen_op_mov_v_reg(s, MO_32, s->T0, reg);
             tcg_gen_ext32u_tl(s->T0, s->T0);
             tcg_gen_bswap32_tl(s->T0, s->T0);
-            gen_op_mov_reg_v(MO_32, reg, s->T0);
+            gen_op_mov_reg_v(s, MO_32, reg, s->T0);
         }
         break;
     case 0xd6: /* salc */
@@ -7100,7 +7101,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             goto illegal_op;
         gen_compute_eflags_c(s, s->T0);
         tcg_gen_neg_tl(s->T0, s->T0);
-        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
+        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
         break;
     case 0xe0: /* loopnz */
     case 0xe1: /* loopz */
@@ -7661,16 +7662,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                gen_op_mov_v_reg(MO_32, s->T0, rm);
+                gen_op_mov_v_reg(s, MO_32, s->T0, rm);
                 /* sign extend */
                 if (d_ot == MO_64) {
                     tcg_gen_ext32s_tl(s->T0, s->T0);
                 }
-                gen_op_mov_reg_v(d_ot, reg, s->T0);
+                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
-                gen_op_mov_reg_v(d_ot, reg, s->T0);
+                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
             }
         } else
 #endif
@@ -7694,10 +7695,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 a0 = tcg_temp_local_new();
                 tcg_gen_mov_tl(a0, s->A0);
             } else {
-                gen_op_mov_v_reg(ot, t0, rm);
+                gen_op_mov_v_reg(s, ot, t0, rm);
                 a0 = NULL;
             }
-            gen_op_mov_v_reg(ot, t1, reg);
+            gen_op_mov_v_reg(s, ot, t1, reg);
             tcg_gen_andi_tl(s->tmp0, t0, 3);
             tcg_gen_andi_tl(t1, t1, 3);
             tcg_gen_movi_tl(t2, 0);
@@ -7711,7 +7712,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_op_st_v(s, ot, t0, a0);
                 tcg_temp_free(a0);
            } else {
-                gen_op_mov_reg_v(ot, rm, t0);
+                gen_op_mov_reg_v(s, ot, rm, t0);
             }
             gen_compute_eflags(s);
             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
@@ -7742,7 +7743,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
             label1 = gen_new_label();
             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
-            gen_op_mov_reg_v(ot, reg, t0);
+            gen_op_mov_reg_v(s, ot, reg, t0);
             gen_set_label(label1);
             set_cc_op(s, CC_OP_EFLAGS);
             tcg_temp_free(t0);
@@ -7996,7 +7997,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_start();
                     }
-                    gen_op_mov_v_reg(ot, s->T0, rm);
+                    gen_op_mov_v_reg(s, ot, s->T0, rm);
                     gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
                                          s->T0);
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
@@ -8009,7 +8010,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                         gen_io_start();
                     }
                     gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
-                    gen_op_mov_reg_v(ot, rm, s->T0);
+                    gen_op_mov_reg_v(s, ot, rm, s->T0);
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_end();
                     }
@@ -8042,7 +8043,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             }
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
-                gen_op_mov_v_reg(ot, s->T0, rm);
+                gen_op_mov_v_reg(s, ot, s->T0, rm);
                 tcg_gen_movi_i32(s->tmp2_i32, reg);
                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
                 gen_jmp_im(s, s->pc - s->cs_base);
@@ -8051,7 +8052,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
                 tcg_gen_movi_i32(s->tmp2_i32, reg);
                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
-                gen_op_mov_reg_v(ot, rm, s->T0);
+                gen_op_mov_reg_v(s, ot, rm, s->T0);
             }
         }
         break;
@@ -8313,7 +8314,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_extu(ot, s->T0);
         tcg_gen_mov_tl(cpu_cc_src, s->T0);
         tcg_gen_ctpop_tl(s->T0, s->T0);
-        gen_op_mov_reg_v(ot, reg, s->T0);
+        gen_op_mov_reg_v(s, ot, reg, s->T0);
 
         set_cc_op(s, CC_OP_POPCNT);
         break;
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [Qemu-devel] [PATCH v3 13/13] configure: enable mttcg for i386 and x86_64
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (11 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs " Emilio G. Cota
@ 2018-09-11 20:28 ` Emilio G. Cota
  2018-09-12 12:46 ` [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Paolo Bonzini
  13 siblings, 0 replies; 32+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:28 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 configure | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configure b/configure
index 58862d2ae8..f715252c9f 100755
--- a/configure
+++ b/configure
@@ -7025,12 +7025,14 @@ TARGET_ABI_DIR=""
 
 case "$target_name" in
   i386)
+    mttcg="yes"
     gdb_xml_files="i386-32bit.xml i386-32bit-core.xml i386-32bit-sse.xml"
     target_compiler=$cross_cc_i386
     target_compiler_cflags=$cross_cc_ccflags_i386
   ;;
   x86_64)
     TARGET_BASE_ARCH=i386
+    mttcg="yes"
     gdb_xml_files="i386-64bit.xml i386-64bit-core.xml i386-64bit-sse.xml"
     target_compiler=$cross_cc_x86_64
   ;;
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext Emilio G. Cota
@ 2018-09-11 20:44   ` Richard Henderson
  2018-09-13 14:21   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:44 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost, Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 32 ++++++++++++++++++--------------
>  1 file changed, 18 insertions(+), 14 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 " Emilio G. Cota
@ 2018-09-11 20:45   ` Richard Henderson
  2018-09-13 14:23   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:45 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost, Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 472 ++++++++++++++++++++--------------------
>  1 file changed, 236 insertions(+), 236 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 " Emilio G. Cota
@ 2018-09-11 20:47   ` Richard Henderson
  2018-09-13 14:25   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:47 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost, Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 1174 ++++++++++++++++++++-------------------
>  1 file changed, 594 insertions(+), 580 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 " Emilio G. Cota
@ 2018-09-11 20:48   ` Richard Henderson
  2018-09-13 14:26   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:48 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost, Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 341 ++++++++++++++++++++--------------------
>  1 file changed, 170 insertions(+), 171 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 05/13] target/i386: move cpu_tmp0 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 05/13] target/i386: move cpu_tmp0 " Emilio G. Cota
@ 2018-09-11 20:51   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:51 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost, Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 282 ++++++++++++++++++++--------------------
>  1 file changed, 144 insertions(+), 138 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

I will note that these tmpN variables ought to be eliminated
completely, in favor of totally local temporary allocation.

But that should be done separately, because while they *ought*
to be local, in some cases it may be hard to see that they are.


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 06/13] target/i386: move cpu_tmp4 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 06/13] target/i386: move cpu_tmp4 " Emilio G. Cota
@ 2018-09-11 20:52   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:52 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost, Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 78 ++++++++++++++++++++---------------------
>  1 file changed, 39 insertions(+), 39 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 07/13] target/i386: move cpu_ptr0 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 07/13] target/i386: move cpu_ptr0 " Emilio G. Cota
@ 2018-09-11 20:53   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:53 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost, Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 101 +++++++++++++++++++++-------------------
>  1 file changed, 52 insertions(+), 49 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 08/13] target/i386: move cpu_ptr1 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 08/13] target/i386: move cpu_ptr1 " Emilio G. Cota
@ 2018-09-11 20:54   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:54 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost, Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 52 ++++++++++++++++++++---------------------
>  1 file changed, 26 insertions(+), 26 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 09/13] target/i386: move cpu_tmp2_i32 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 09/13] target/i386: move cpu_tmp2_i32 " Emilio G. Cota
@ 2018-09-11 20:55   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:55 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost, Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 347 ++++++++++++++++++++--------------------
>  1 file changed, 174 insertions(+), 173 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 10/13] target/i386: move cpu_tmp3_i32 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 10/13] target/i386: move cpu_tmp3_i32 " Emilio G. Cota
@ 2018-09-11 20:56   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:56 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost, Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 64 ++++++++++++++++++++---------------------
>  1 file changed, 32 insertions(+), 32 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 11/13] target/i386: move cpu_tmp1_i64 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 11/13] target/i386: move cpu_tmp1_i64 " Emilio G. Cota
@ 2018-09-11 20:57   ` Richard Henderson
  0 siblings, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:57 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost, Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 160 ++++++++++++++++++++--------------------
>  1 file changed, 80 insertions(+), 80 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs " Emilio G. Cota
@ 2018-09-11 20:58   ` Richard Henderson
  2018-09-13 14:31   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Richard Henderson @ 2018-09-11 20:58 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost, Alex Bennée

On 09/11/2018 01:28 PM, Emilio G. Cota wrote:
> And convert it to a bool to use an existing hole
> in the struct.
> 
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  target/i386/translate.c | 307 ++++++++++++++++++++--------------------
>  1 file changed, 154 insertions(+), 153 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg
  2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
                   ` (12 preceding siblings ...)
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 13/13] configure: enable mttcg for i386 and x86_64 Emilio G. Cota
@ 2018-09-12 12:46 ` Paolo Bonzini
  13 siblings, 0 replies; 32+ messages in thread
From: Paolo Bonzini @ 2018-09-12 12:46 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Peter Crosthwaite, Richard Henderson, Eduardo Habkost, Alex Bennée

On 11/09/2018 22:28, Emilio G. Cota wrote:
> v2: https://lists.gnu.org/archive/html/qemu-devel/2018-09/msg01122.html
> 
> Changes since v2:
> 
> - Add rth's R-b tag to the last patch
> - Drop v2's first 10 patches, since Paolo already picked those up
> - Move TCG temps + x86_64_hregs to DisasContext
>   + While at it, drop the cpu_ prefix from the TCG temps,
>     e.g. cpu_A0 -> s->A0
>   + Split the conversion into separate patches to ease review.
>     The patches are quite boring and long because the temps
>     are everywhere, and I had to add DisasContext *s to quite a few
>     functions
> 
> The series is checkpatch-clean.
> 
> You can fetch these patches from:
>   https://github.com/cota/qemu/tree/i386-mttcg-v3

Great, thanks!

Paolo

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext Emilio G. Cota
  2018-09-11 20:44   ` Richard Henderson
@ 2018-09-13 14:21   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Alex Bennée @ 2018-09-13 14:21 UTC (permalink / raw)
  To: Emilio G. Cota
  Cc: qemu-devel, Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost


Emilio G. Cota <cota@braap.org> writes:

> Signed-off-by: Emilio G. Cota <cota@braap.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  target/i386/translate.c | 32 ++++++++++++++++++--------------
>  1 file changed, 18 insertions(+), 14 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index 1f9d1d9b24..e9f512472e 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -73,7 +73,7 @@
>
>  /* global register indexes */
>  static TCGv cpu_A0;
> -static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
> +static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
>  static TCGv_i32 cpu_cc_op;
>  static TCGv cpu_regs[CPU_NB_REGS];
>  static TCGv cpu_seg_base[6];
> @@ -135,6 +135,10 @@ typedef struct DisasContext {
>      int cpuid_ext3_features;
>      int cpuid_7_0_ebx_features;
>      int cpuid_xsave_features;
> +
> +    /* TCG local temps */
> +    TCGv cc_srcT;
> +
>      sigjmp_buf jmpbuf;
>  } DisasContext;
>
> @@ -244,7 +248,7 @@ static void set_cc_op(DisasContext *s, CCOp op)
>          tcg_gen_discard_tl(cpu_cc_src2);
>      }
>      if (dead & USES_CC_SRCT) {
> -        tcg_gen_discard_tl(cpu_cc_srcT);
> +        tcg_gen_discard_tl(s->cc_srcT);
>      }
>
>      if (op == CC_OP_DYNAMIC) {
> @@ -667,11 +671,11 @@ static inline void gen_op_testl_T0_T1_cc(void)
>      tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
>  }
>
> -static void gen_op_update_neg_cc(void)
> +static void gen_op_update_neg_cc(DisasContext *s)
>  {
>      tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
>      tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
> -    tcg_gen_movi_tl(cpu_cc_srcT, 0);
> +    tcg_gen_movi_tl(s->cc_srcT, 0);
>  }
>
>  /* compute all eflags to cc_src */
> @@ -742,7 +746,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
>          t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
>          /* If no temporary was used, be careful not to alias t1 and t0.  */
>          t0 = t1 == cpu_cc_src ? cpu_tmp0 : reg;
> -        tcg_gen_mov_tl(t0, cpu_cc_srcT);
> +        tcg_gen_mov_tl(t0, s->cc_srcT);
>          gen_extu(size, t0);
>          goto add_sub;
>
> @@ -899,7 +903,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
>          size = s->cc_op - CC_OP_SUBB;
>          switch (jcc_op) {
>          case JCC_BE:
> -            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
> +            tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
>              gen_extu(size, cpu_tmp4);
>              t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
>              cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
> @@ -912,7 +916,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
>          case JCC_LE:
>              cond = TCG_COND_LE;
>          fast_jcc_l:
> -            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
> +            tcg_gen_mov_tl(cpu_tmp4, s->cc_srcT);
>              gen_exts(size, cpu_tmp4);
>              t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
>              cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
> @@ -1309,11 +1313,11 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      case OP_SUBL:
>          if (s1->prefix & PREFIX_LOCK) {
>              tcg_gen_neg_tl(cpu_T0, cpu_T1);
> -            tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
> +            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, cpu_A0, cpu_T0,
>                                          s1->mem_index, ot | MO_LE);
> -            tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
> +            tcg_gen_sub_tl(cpu_T0, s1->cc_srcT, cpu_T1);
>          } else {
> -            tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
> +            tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
>              tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> @@ -1356,7 +1360,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_CMPL:
>          tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> -        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
> +        tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
>          tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
>          set_cc_op(s1, CC_OP_SUBB + ot);
>          break;
> @@ -4823,7 +4827,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      gen_op_mov_reg_v(ot, rm, cpu_T0);
>                  }
>              }
> -            gen_op_update_neg_cc();
> +            gen_op_update_neg_cc(s);
>              set_cc_op(s, CC_OP_SUBB + ot);
>              break;
>          case 4: /* mul */
> @@ -5283,7 +5287,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  }
>              }
>              tcg_gen_mov_tl(cpu_cc_src, oldv);
> -            tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
> +            tcg_gen_mov_tl(s->cc_srcT, cmpv);
>              tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
>              set_cc_op(s, CC_OP_SUBB + ot);
>              tcg_temp_free(oldv);
> @@ -8463,7 +8467,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
>      cpu_tmp4 = tcg_temp_new();
>      cpu_ptr0 = tcg_temp_new_ptr();
>      cpu_ptr1 = tcg_temp_new_ptr();
> -    cpu_cc_srcT = tcg_temp_local_new();
> +    dc->cc_srcT = tcg_temp_local_new();
>  }
>
>  static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)


--
Alex Bennée

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 " Emilio G. Cota
  2018-09-11 20:45   ` Richard Henderson
@ 2018-09-13 14:23   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Alex Bennée @ 2018-09-13 14:23 UTC (permalink / raw)
  To: Emilio G. Cota
  Cc: qemu-devel, Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost


Emilio G. Cota <cota@braap.org> writes:

> Signed-off-by: Emilio G. Cota <cota@braap.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  target/i386/translate.c | 472 ++++++++++++++++++++--------------------
>  1 file changed, 236 insertions(+), 236 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index e9f512472e..c6b1baab9d 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -72,7 +72,6 @@
>  //#define MACRO_TEST   1
>
>  /* global register indexes */
> -static TCGv cpu_A0;
>  static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
>  static TCGv_i32 cpu_cc_op;
>  static TCGv cpu_regs[CPU_NB_REGS];
> @@ -138,6 +137,7 @@ typedef struct DisasContext {
>
>      /* TCG local temps */
>      TCGv cc_srcT;
> +    TCGv A0;
>
>      sigjmp_buf jmpbuf;
>  } DisasContext;
> @@ -395,9 +395,9 @@ static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
>
>  static void gen_add_A0_im(DisasContext *s, int val)
>  {
> -    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> +    tcg_gen_addi_tl(s->A0, s->A0, val);
>      if (!CODE64(s)) {
> -        tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
> +        tcg_gen_ext32u_tl(s->A0, s->A0);
>      }
>  }
>
> @@ -431,7 +431,7 @@ static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
>  static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
>  {
>      if (d == OR_TMP0) {
> -        gen_op_st_v(s, idx, cpu_T0, cpu_A0);
> +        gen_op_st_v(s, idx, cpu_T0, s->A0);
>      } else {
>          gen_op_mov_reg_v(idx, d, cpu_T0);
>      }
> @@ -453,7 +453,7 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
>  #ifdef TARGET_X86_64
>      case MO_64:
>          if (ovr_seg < 0) {
> -            tcg_gen_mov_tl(cpu_A0, a0);
> +            tcg_gen_mov_tl(s->A0, a0);
>              return;
>          }
>          break;
> @@ -464,14 +464,14 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
>              ovr_seg = def_seg;
>          }
>          if (ovr_seg < 0) {
> -            tcg_gen_ext32u_tl(cpu_A0, a0);
> +            tcg_gen_ext32u_tl(s->A0, a0);
>              return;
>          }
>          break;
>      case MO_16:
>          /* 16 bit address */
> -        tcg_gen_ext16u_tl(cpu_A0, a0);
> -        a0 = cpu_A0;
> +        tcg_gen_ext16u_tl(s->A0, a0);
> +        a0 = s->A0;
>          if (ovr_seg < 0) {
>              if (s->addseg) {
>                  ovr_seg = def_seg;
> @@ -488,13 +488,13 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
>          TCGv seg = cpu_seg_base[ovr_seg];
>
>          if (aflag == MO_64) {
> -            tcg_gen_add_tl(cpu_A0, a0, seg);
> +            tcg_gen_add_tl(s->A0, a0, seg);
>          } else if (CODE64(s)) {
> -            tcg_gen_ext32u_tl(cpu_A0, a0);
> -            tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
> +            tcg_gen_ext32u_tl(s->A0, a0);
> +            tcg_gen_add_tl(s->A0, s->A0, seg);
>          } else {
> -            tcg_gen_add_tl(cpu_A0, a0, seg);
> -            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
> +            tcg_gen_add_tl(s->A0, a0, seg);
> +            tcg_gen_ext32u_tl(s->A0, s->A0);
>          }
>      }
>  }
> @@ -640,9 +640,9 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
>  static inline void gen_movs(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_ESI(s);
> -    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      gen_string_movl_A0_EDI(s);
> -    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_st_v(s, ot, cpu_T0, s->A0);
>      gen_op_movl_T0_Dshift(ot);
>      gen_op_add_reg_T0(s->aflag, R_ESI);
>      gen_op_add_reg_T0(s->aflag, R_EDI);
> @@ -1072,7 +1072,7 @@ static inline void gen_stos(DisasContext *s, TCGMemOp ot)
>  {
>      gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
>      gen_string_movl_A0_EDI(s);
> -    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_st_v(s, ot, cpu_T0, s->A0);
>      gen_op_movl_T0_Dshift(ot);
>      gen_op_add_reg_T0(s->aflag, R_EDI);
>  }
> @@ -1080,7 +1080,7 @@ static inline void gen_stos(DisasContext *s, TCGMemOp ot)
>  static inline void gen_lods(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_ESI(s);
> -    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
>      gen_op_movl_T0_Dshift(ot);
>      gen_op_add_reg_T0(s->aflag, R_ESI);
> @@ -1089,7 +1089,7 @@ static inline void gen_lods(DisasContext *s, TCGMemOp ot)
>  static inline void gen_scas(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_EDI(s);
> -    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +    gen_op_ld_v(s, ot, cpu_T1, s->A0);
>      gen_op(s, OP_CMPL, ot, R_EAX);
>      gen_op_movl_T0_Dshift(ot);
>      gen_op_add_reg_T0(s->aflag, R_EDI);
> @@ -1098,7 +1098,7 @@ static inline void gen_scas(DisasContext *s, TCGMemOp ot)
>  static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_EDI(s);
> -    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +    gen_op_ld_v(s, ot, cpu_T1, s->A0);
>      gen_string_movl_A0_ESI(s);
>      gen_op(s, OP_CMPL, ot, OR_TMP0);
>      gen_op_movl_T0_Dshift(ot);
> @@ -1128,11 +1128,11 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot)
>      /* Note: we must do this dummy write first to be restartable in
>         case of page fault. */
>      tcg_gen_movi_tl(cpu_T0, 0);
> -    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_st_v(s, ot, cpu_T0, s->A0);
>      tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
>      tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
>      gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
> -    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_st_v(s, ot, cpu_T0, s->A0);
>      gen_op_movl_T0_Dshift(ot);
>      gen_op_add_reg_T0(s->aflag, R_EDI);
>      gen_bpt_io(s, cpu_tmp2_i32, ot);
> @@ -1147,7 +1147,7 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
>          gen_io_start();
>      }
>      gen_string_movl_A0_ESI(s);
> -    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +    gen_op_ld_v(s, ot, cpu_T0, s->A0);
>
>      tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
>      tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
> @@ -1267,14 +1267,14 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      if (d != OR_TMP0) {
>          gen_op_mov_v_reg(ot, cpu_T0, d);
>      } else if (!(s1->prefix & PREFIX_LOCK)) {
> -        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
>      }
>      switch(op) {
>      case OP_ADCL:
>          gen_compute_eflags_c(s1, cpu_tmp4);
>          if (s1->prefix & PREFIX_LOCK) {
>              tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
> -            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
> +            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
>              tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> @@ -1289,7 +1289,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          if (s1->prefix & PREFIX_LOCK) {
>              tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
>              tcg_gen_neg_tl(cpu_T0, cpu_T0);
> -            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
> +            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
>              tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
> @@ -1301,7 +1301,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_ADDL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
> +            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
>              tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> @@ -1313,7 +1313,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      case OP_SUBL:
>          if (s1->prefix & PREFIX_LOCK) {
>              tcg_gen_neg_tl(cpu_T0, cpu_T1);
> -            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, cpu_A0, cpu_T0,
> +            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, cpu_T0,
>                                          s1->mem_index, ot | MO_LE);
>              tcg_gen_sub_tl(cpu_T0, s1->cc_srcT, cpu_T1);
>          } else {
> @@ -1327,7 +1327,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      default:
>      case OP_ANDL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
> +            tcg_gen_atomic_and_fetch_tl(cpu_T0, s1->A0, cpu_T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
>              tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
> @@ -1338,7 +1338,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_ORL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
> +            tcg_gen_atomic_or_fetch_tl(cpu_T0, s1->A0, cpu_T1,
>                                         s1->mem_index, ot | MO_LE);
>          } else {
>              tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
> @@ -1349,7 +1349,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_XORL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
> +            tcg_gen_atomic_xor_fetch_tl(cpu_T0, s1->A0, cpu_T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
>              tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
> @@ -1372,13 +1372,13 @@ static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
>  {
>      if (s1->prefix & PREFIX_LOCK) {
>          tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
> -        tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
> +        tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
>                                      s1->mem_index, ot | MO_LE);
>      } else {
>          if (d != OR_TMP0) {
>              gen_op_mov_v_reg(ot, cpu_T0, d);
>          } else {
> -            gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
>          }
>          tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
>          gen_op_st_rm_T0_A0(s1, ot, d);
> @@ -1441,7 +1441,7 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      } else {
>          gen_op_mov_v_reg(ot, cpu_T0, op1);
>      }
> @@ -1477,7 +1477,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>
>      /* load */
>      if (op1 == OR_TMP0)
> -        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      else
>          gen_op_mov_v_reg(ot, cpu_T0, op1);
>
> @@ -1517,7 +1517,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      } else {
>          gen_op_mov_v_reg(ot, cpu_T0, op1);
>      }
> @@ -1603,7 +1603,7 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      } else {
>          gen_op_mov_v_reg(ot, cpu_T0, op1);
>      }
> @@ -1681,7 +1681,7 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      /* load */
>      if (op1 == OR_TMP0)
> -        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      else
>          gen_op_mov_v_reg(ot, cpu_T0, op1);
>
> @@ -1737,7 +1737,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T0, s->A0);
>      } else {
>          gen_op_mov_v_reg(ot, cpu_T0, op1);
>      }
> @@ -2052,7 +2052,7 @@ static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
>  }
>
>  /* Compute the address, with a minimum number of TCG ops.  */
> -static TCGv gen_lea_modrm_1(AddressParts a)
> +static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
>  {
>      TCGv ea = NULL;
>
> @@ -2060,22 +2060,22 @@ static TCGv gen_lea_modrm_1(AddressParts a)
>          if (a.scale == 0) {
>              ea = cpu_regs[a.index];
>          } else {
> -            tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
> -            ea = cpu_A0;
> +            tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
> +            ea = s->A0;
>          }
>          if (a.base >= 0) {
> -            tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
> -            ea = cpu_A0;
> +            tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
> +            ea = s->A0;
>          }
>      } else if (a.base >= 0) {
>          ea = cpu_regs[a.base];
>      }
>      if (!ea) {
> -        tcg_gen_movi_tl(cpu_A0, a.disp);
> -        ea = cpu_A0;
> +        tcg_gen_movi_tl(s->A0, a.disp);
> +        ea = s->A0;
>      } else if (a.disp != 0) {
> -        tcg_gen_addi_tl(cpu_A0, ea, a.disp);
> -        ea = cpu_A0;
> +        tcg_gen_addi_tl(s->A0, ea, a.disp);
> +        ea = s->A0;
>      }
>
>      return ea;
> @@ -2084,7 +2084,7 @@ static TCGv gen_lea_modrm_1(AddressParts a)
>  static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
>  {
>      AddressParts a = gen_lea_modrm_0(env, s, modrm);
> -    TCGv ea = gen_lea_modrm_1(a);
> +    TCGv ea = gen_lea_modrm_1(s, a);
>      gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
>  }
>
> @@ -2097,7 +2097,7 @@ static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
>  static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
>                        TCGCond cond, TCGv_i64 bndv)
>  {
> -    TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
> +    TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
>
>      tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
>      if (!CODE64(s)) {
> @@ -2111,7 +2111,7 @@ static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
>  /* used for LEA and MOV AX, mem */
>  static void gen_add_A0_ds_seg(DisasContext *s)
>  {
> -    gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
> +    gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
>  }
>
>  /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
> @@ -2138,9 +2138,9 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
>          if (is_store) {
>              if (reg != OR_TMP0)
>                  gen_op_mov_v_reg(ot, cpu_T0, reg);
> -            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, ot, cpu_T0, s->A0);
>          } else {
> -            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, ot, cpu_T0, s->A0);
>              if (reg != OR_TMP0)
>                  gen_op_mov_reg_v(ot, reg, cpu_T0);
>          }
> @@ -2334,19 +2334,19 @@ static void gen_push_v(DisasContext *s, TCGv val)
>      TCGMemOp d_ot = mo_pushpop(s, s->dflag);
>      TCGMemOp a_ot = mo_stacksize(s);
>      int size = 1 << d_ot;
> -    TCGv new_esp = cpu_A0;
> +    TCGv new_esp = s->A0;
>
> -    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
> +    tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
>
>      if (!CODE64(s)) {
>          if (s->addseg) {
>              new_esp = cpu_tmp4;
> -            tcg_gen_mov_tl(new_esp, cpu_A0);
> +            tcg_gen_mov_tl(new_esp, s->A0);
>          }
> -        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
> +        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
>      }
>
> -    gen_op_st_v(s, d_ot, val, cpu_A0);
> +    gen_op_st_v(s, d_ot, val, s->A0);
>      gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
>  }
>
> @@ -2356,7 +2356,7 @@ static TCGMemOp gen_pop_T0(DisasContext *s)
>      TCGMemOp d_ot = mo_pushpop(s, s->dflag);
>
>      gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
> -    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
> +    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
>
>      return d_ot;
>  }
> @@ -2379,9 +2379,9 @@ static void gen_pusha(DisasContext *s)
>      int i;
>
>      for (i = 0; i < 8; i++) {
> -        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
> -        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
> -        gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
> +        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
> +        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
> +        gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
>      }
>
>      gen_stack_update(s, -8 * size);
> @@ -2399,9 +2399,9 @@ static void gen_popa(DisasContext *s)
>          if (7 - i == R_ESP) {
>              continue;
>          }
> -        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
> -        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
> -        gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
> +        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
> +        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
> +        gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
>          gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
>      }
>
> @@ -2417,7 +2417,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
>      /* Push BP; compute FrameTemp into T1.  */
>      tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
>      gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
> -    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
> +    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
>
>      level &= 31;
>      if (level != 0) {
> @@ -2425,19 +2425,19 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
>
>          /* Copy level-1 pointers from the previous frame.  */
>          for (i = 1; i < level; ++i) {
> -            tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
> -            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
> -            gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
> +            tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
> +            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
> +            gen_op_ld_v(s, d_ot, cpu_tmp0, s->A0);
>
> -            tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
> -            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
> -            gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
> +            tcg_gen_subi_tl(s->A0, cpu_T1, size * i);
> +            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
> +            gen_op_st_v(s, d_ot, cpu_tmp0, s->A0);
>          }
>
>          /* Push the current FrameTemp as the last level.  */
> -        tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
> -        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
> -        gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
> +        tcg_gen_subi_tl(s->A0, cpu_T1, size * level);
> +        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
> +        gen_op_st_v(s, d_ot, cpu_T1, s->A0);
>      }
>
>      /* Copy the FrameTemp value to EBP.  */
> @@ -2454,7 +2454,7 @@ static void gen_leave(DisasContext *s)
>      TCGMemOp a_ot = mo_stacksize(s);
>
>      gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
> -    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
> +    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
>
>      tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
>
> @@ -2633,22 +2633,22 @@ static void gen_jmp(DisasContext *s, target_ulong eip)
>
>  static inline void gen_ldq_env_A0(DisasContext *s, int offset)
>  {
> -    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
> +    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
>      tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
>  }
>
>  static inline void gen_stq_env_A0(DisasContext *s, int offset)
>  {
>      tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
> -    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
> +    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
>  }
>
>  static inline void gen_ldo_env_A0(DisasContext *s, int offset)
>  {
>      int mem_index = s->mem_index;
> -    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
> +    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
>      tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
> -    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
> +    tcg_gen_addi_tl(cpu_tmp0, s->A0, 8);
>      tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
>      tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
>  }
> @@ -2657,8 +2657,8 @@ static inline void gen_sto_env_A0(DisasContext *s, int offset)
>  {
>      int mem_index = s->mem_index;
>      tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
> -    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
> -    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
> +    tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, mem_index, MO_LEQ);
> +    tcg_gen_addi_tl(cpu_tmp0, s->A0, 8);
>      tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
>      tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
>  }
> @@ -3128,7 +3128,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              } else {
>                  tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
>                      xmm_regs[reg].ZMM_L(0)));
> -                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
> +                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
>              }
>              break;
>          case 0x6e: /* movd mm, ea */
> @@ -3193,7 +3193,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x210: /* movss xmm, ea */
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
>                  tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
>                  tcg_gen_movi_tl(cpu_T0, 0);
>                  tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
> @@ -3380,7 +3380,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
>                  tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
> -                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
> +                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
> @@ -3555,7 +3555,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  if ((b >> 8) & 1) {
>                      gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
>                  } else {
> -                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
> +                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
>                      tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
>                  }
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
> @@ -3694,13 +3694,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                          break;
>                      case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
>                      case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
>                                          offsetof(ZMMReg, ZMM_L(0)));
>                          break;
>                      case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
> -                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
> +                        tcg_gen_qemu_ld_tl(cpu_tmp0, s->A0,
>                                             s->mem_index, MO_LEUW);
>                          tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
>                                          offsetof(ZMMReg, ZMM_W(0)));
> @@ -3789,11 +3789,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>
>                  gen_lea_modrm(env, s, modrm);
>                  if ((b & 1) == 0) {
> -                    tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
> +                    tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
>                                         s->mem_index, ot | MO_BE);
>                      gen_op_mov_reg_v(ot, reg, cpu_T0);
>                  } else {
> -                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
> +                    tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
>                                         s->mem_index, ot | MO_BE);
>                  }
>                  break;
> @@ -3825,23 +3825,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>                      /* Extract START, and shift the operand.
>                         Shifts larger than operand size get zeros.  */
> -                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
> -                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
> +                    tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
> +                    tcg_gen_shr_tl(cpu_T0, cpu_T0, s->A0);
>
>                      bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
>                      zero = tcg_const_tl(0);
> -                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
> +                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, s->A0, bound,
>                                         cpu_T0, zero);
>                      tcg_temp_free(zero);
>
>                      /* Extract the LEN into a mask.  Lengths larger than
>                         operand size get all ones.  */
> -                    tcg_gen_extract_tl(cpu_A0, cpu_regs[s->vex_v], 8, 8);
> -                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
> -                                       cpu_A0, bound);
> +                    tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
> +                    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
> +                                       s->A0, bound);
>                      tcg_temp_free(bound);
>                      tcg_gen_movi_tl(cpu_T1, 1);
> -                    tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
> +                    tcg_gen_shl_tl(cpu_T1, cpu_T1, s->A0);
>                      tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
>                      tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
>
> @@ -3870,9 +3870,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                                         bound, bound, cpu_T1);
>                      tcg_temp_free(bound);
>                  }
> -                tcg_gen_movi_tl(cpu_A0, -1);
> -                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
> -                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
> +                tcg_gen_movi_tl(s->A0, -1);
> +                tcg_gen_shl_tl(s->A0, s->A0, cpu_T1);
> +                tcg_gen_andc_tl(cpu_T0, cpu_T0, s->A0);
>                  gen_op_mov_reg_v(ot, reg, cpu_T0);
>                  gen_op_update1_cc();
>                  set_cc_op(s, CC_OP_BMILGB + ot);
> @@ -4124,7 +4124,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      if (mod == 3) {
>                          gen_op_mov_reg_v(ot, rm, cpu_T0);
>                      } else {
> -                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
> +                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
>                                             s->mem_index, MO_UB);
>                      }
>                      break;
> @@ -4134,7 +4134,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      if (mod == 3) {
>                          gen_op_mov_reg_v(ot, rm, cpu_T0);
>                      } else {
> -                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
> +                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
>                                             s->mem_index, MO_LEUW);
>                      }
>                      break;
> @@ -4146,7 +4146,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                          if (mod == 3) {
>                              tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
>                          } else {
> -                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                                  s->mem_index, MO_LEUL);
>                          }
>                      } else { /* pextrq */
> @@ -4157,7 +4157,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                          if (mod == 3) {
>                              tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
>                          } else {
> -                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
> +                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
>                                                  s->mem_index, MO_LEQ);
>                          }
>  #else
> @@ -4171,7 +4171,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      if (mod == 3) {
>                          gen_op_mov_reg_v(ot, rm, cpu_T0);
>                      } else {
> -                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
> +                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
>                                             s->mem_index, MO_LEUL);
>                      }
>                      break;
> @@ -4179,7 +4179,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      if (mod == 3) {
>                          gen_op_mov_v_reg(MO_32, cpu_T0, rm);
>                      } else {
> -                        tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
> +                        tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
>                                             s->mem_index, MO_UB);
>                      }
>                      tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
> @@ -4191,7 +4191,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                                          offsetof(CPUX86State,xmm_regs[rm]
>                                                  .ZMM_L((val >> 6) & 3)));
>                      } else {
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                      }
>                      tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
> @@ -4219,7 +4219,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                          if (mod == 3) {
>                              tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
>                          } else {
> -                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                                  s->mem_index, MO_LEUL);
>                          }
>                          tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
> @@ -4230,7 +4230,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                          if (mod == 3) {
>                              gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
>                          } else {
> -                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
> +                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
>                                                  s->mem_index, MO_LEQ);
>                          }
>                          tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
> @@ -4360,7 +4360,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  switch (sz) {
>                  case 2:
>                      /* 32 bit access */
> -                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
> +                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
>                      tcg_gen_st32_tl(cpu_T0, cpu_env,
>                                      offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
>                      break;
> @@ -4426,15 +4426,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              /* maskmov : we must prepare A0 */
>              if (mod != 3)
>                  goto illegal_op;
> -            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
> -            gen_extu(s->aflag, cpu_A0);
> +            tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
> +            gen_extu(s->aflag, s->A0);
>              gen_add_A0_ds_seg(s);
>
>              tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
>              tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
>              /* XXX: introduce a new table? */
>              sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
> -            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
> +            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, s->A0);
>              break;
>          default:
>              tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
> @@ -4673,7 +4673,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3) {
>                      gen_lea_modrm(env, s, modrm);
> -                    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +                    gen_op_ld_v(s, ot, cpu_T1, s->A0);
>                  } else if (op == OP_XORL && rm == reg) {
>                      goto xor_zero;
>                  } else {
> @@ -4760,7 +4760,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              /* For those below that handle locked memory, don't load here.  */
>              if (!(s->prefix & PREFIX_LOCK)
>                  || op != 2) {
> -                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T0, s->A0);
>              }
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T0, rm);
> @@ -4779,12 +4779,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      goto illegal_op;
>                  }
>                  tcg_gen_movi_tl(cpu_T0, ~0);
> -                tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
> +                tcg_gen_atomic_xor_fetch_tl(cpu_T0, s->A0, cpu_T0,
>                                              s->mem_index, ot | MO_LE);
>              } else {
>                  tcg_gen_not_tl(cpu_T0, cpu_T0);
>                  if (mod != 3) {
> -                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +                    gen_op_st_v(s, ot, cpu_T0, s->A0);
>                  } else {
>                      gen_op_mov_reg_v(ot, rm, cpu_T0);
>                  }
> @@ -4802,7 +4802,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  t0 = tcg_temp_local_new();
>                  label1 = gen_new_label();
>
> -                tcg_gen_mov_tl(a0, cpu_A0);
> +                tcg_gen_mov_tl(a0, s->A0);
>                  tcg_gen_mov_tl(t0, cpu_T0);
>
>                  gen_set_label(label1);
> @@ -4822,7 +4822,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              } else {
>                  tcg_gen_neg_tl(cpu_T0, cpu_T0);
>                  if (mod != 3) {
> -                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +                    gen_op_st_v(s, ot, cpu_T0, s->A0);
>                  } else {
>                      gen_op_mov_reg_v(ot, rm, cpu_T0);
>                  }
> @@ -5001,7 +5001,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod != 3) {
>              gen_lea_modrm(env, s, modrm);
>              if (op >= 2 && op != 3 && op != 5)
> -                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T0, s->A0);
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T0, rm);
>          }
> @@ -5034,9 +5034,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_jr(s, cpu_T0);
>              break;
>          case 3: /* lcall Ev */
> -            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +            gen_op_ld_v(s, ot, cpu_T1, s->A0);
>              gen_add_A0_im(s, 1 << ot);
> -            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
>          do_lcall:
>              if (s->pe && !s->vm86) {
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> @@ -5061,9 +5061,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_jr(s, cpu_T0);
>              break;
>          case 5: /* ljmp Ev */
> -            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +            gen_op_ld_v(s, ot, cpu_T1, s->A0);
>              gen_add_A0_im(s, 1 << ot);
> -            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
>          do_ljmp:
>              if (s->pe && !s->vm86) {
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> @@ -5225,13 +5225,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          } else {
>              gen_lea_modrm(env, s, modrm);
>              if (s->prefix & PREFIX_LOCK) {
> -                tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
> +                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, cpu_T0,
>                                              s->mem_index, ot | MO_LE);
>                  tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
>              } else {
> -                gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T1, s->A0);
>                  tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> -                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_st_v(s, ot, cpu_T0, s->A0);
>              }
>              gen_op_mov_reg_v(ot, reg, cpu_T1);
>          }
> @@ -5258,7 +5258,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      goto illegal_op;
>                  }
>                  gen_lea_modrm(env, s, modrm);
> -                tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
> +                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
>                                            s->mem_index, ot | MO_LE);
>                  gen_op_mov_reg_v(ot, R_EAX, oldv);
>              } else {
> @@ -5267,7 +5267,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      gen_op_mov_v_reg(ot, oldv, rm);
>                  } else {
>                      gen_lea_modrm(env, s, modrm);
> -                    gen_op_ld_v(s, ot, oldv, cpu_A0);
> +                    gen_op_ld_v(s, ot, oldv, s->A0);
>                      rm = 0; /* avoid warning */
>                  }
>                  gen_extu(ot, oldv);
> @@ -5282,7 +5282,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                         must be before changing accumulator to ensure
>                         idempotency if the store faults and the instruction
>                         is restarted */
> -                    gen_op_st_v(s, ot, newv, cpu_A0);
> +                    gen_op_st_v(s, ot, newv, s->A0);
>                      gen_op_mov_reg_v(ot, R_EAX, oldv);
>                  }
>              }
> @@ -5306,9 +5306,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  goto illegal_op;
>              gen_lea_modrm(env, s, modrm);
>              if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
> -                gen_helper_cmpxchg16b(cpu_env, cpu_A0);
> +                gen_helper_cmpxchg16b(cpu_env, s->A0);
>              } else {
> -                gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0);
> +                gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
>              }
>          } else
>  #endif
> @@ -5317,9 +5317,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  goto illegal_op;
>              gen_lea_modrm(env, s, modrm);
>              if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
> -                gen_helper_cmpxchg8b(cpu_env, cpu_A0);
> +                gen_helper_cmpxchg8b(cpu_env, s->A0);
>              } else {
> -                gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0);
> +                gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
>              }
>          }
>          set_cc_op(s, CC_OP_EFLAGS);
> @@ -5453,7 +5453,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          val = insn_get(env, s, ot);
>          tcg_gen_movi_tl(cpu_T0, val);
>          if (mod != 3) {
> -            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, ot, cpu_T0, s->A0);
>          } else {
>              gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
>          }
> @@ -5540,7 +5540,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T0);
>              } else {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, s_ot, cpu_T0, s->A0);
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T0);
>              }
>          }
> @@ -5554,9 +5554,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>          {
>              AddressParts a = gen_lea_modrm_0(env, s, modrm);
> -            TCGv ea = gen_lea_modrm_1(a);
> +            TCGv ea = gen_lea_modrm_1(s, a);
>              gen_lea_v_seg(s, s->aflag, ea, -1, -1);
> -            gen_op_mov_reg_v(dflag, reg, cpu_A0);
> +            gen_op_mov_reg_v(dflag, reg, s->A0);
>          }
>          break;
>
> @@ -5578,24 +5578,24 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  offset_addr = insn_get(env, s, s->aflag);
>                  break;
>              }
> -            tcg_gen_movi_tl(cpu_A0, offset_addr);
> +            tcg_gen_movi_tl(s->A0, offset_addr);
>              gen_add_A0_ds_seg(s);
>              if ((b & 2) == 0) {
> -                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T0, s->A0);
>                  gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
>              } else {
>                  gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
> -                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_st_v(s, ot, cpu_T0, s->A0);
>              }
>          }
>          break;
>      case 0xd7: /* xlat */
> -        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
> +        tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
>          tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
> -        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0);
> -        gen_extu(s->aflag, cpu_A0);
> +        tcg_gen_add_tl(s->A0, s->A0, cpu_T0);
> +        gen_extu(s->aflag, s->A0);
>          gen_add_A0_ds_seg(s);
> -        gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, MO_8, cpu_T0, s->A0);
>          gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
>          break;
>      case 0xb0 ... 0xb7: /* mov R, Ib */
> @@ -5646,7 +5646,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>              gen_op_mov_v_reg(ot, cpu_T0, reg);
>              /* for xchg, lock is implicit */
> -            tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
> +            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, cpu_T0,
>                                     s->mem_index, ot | MO_LE);
>              gen_op_mov_reg_v(ot, reg, cpu_T1);
>          }
> @@ -5675,10 +5675,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod == 3)
>              goto illegal_op;
>          gen_lea_modrm(env, s, modrm);
> -        gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
> +        gen_op_ld_v(s, ot, cpu_T1, s->A0);
>          gen_add_A0_im(s, 1 << ot);
>          /* load the segment first to handle exceptions properly */
> -        gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
> +        gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
>          gen_movl_seg_T0(s, op);
>          /* then put the data */
>          gen_op_mov_reg_v(ot, reg, cpu_T1);
> @@ -5798,23 +5798,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>
>                      switch(op >> 4) {
>                      case 0:
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
>                          break;
>                      case 1:
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
>                          break;
>                      case 2:
> -                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
> +                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
>                                              s->mem_index, MO_LEQ);
>                          gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
>                          break;
>                      case 3:
>                      default:
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LESW);
>                          gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
>                          break;
> @@ -5837,23 +5837,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  case 0:
>                      switch(op >> 4) {
>                      case 0:
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
>                          break;
>                      case 1:
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
>                          break;
>                      case 2:
> -                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
> +                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0,
>                                              s->mem_index, MO_LEQ);
>                          gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
>                          break;
>                      case 3:
>                      default:
> -                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LESW);
>                          gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
>                          break;
> @@ -5864,18 +5864,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      switch(op >> 4) {
>                      case 1:
>                          gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
> -                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          break;
>                      case 2:
>                          gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
> -                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
> +                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
>                                              s->mem_index, MO_LEQ);
>                          break;
>                      case 3:
>                      default:
>                          gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
> -                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUW);
>                          break;
>                      }
> @@ -5885,23 +5885,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      switch(op >> 4) {
>                      case 0:
>                          gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
> -                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          break;
>                      case 1:
>                          gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
> -                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUL);
>                          break;
>                      case 2:
>                          gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
> -                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
> +                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0,
>                                              s->mem_index, MO_LEQ);
>                          break;
>                      case 3:
>                      default:
>                          gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
> -                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                              s->mem_index, MO_LEUW);
>                          break;
>                      }
> @@ -5911,53 +5911,53 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  }
>                  break;
>              case 0x0c: /* fldenv mem */
> -                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
>                  break;
>              case 0x0d: /* fldcw mem */
> -                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> +                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0,
>                                      s->mem_index, MO_LEUW);
>                  gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
>                  break;
>              case 0x0e: /* fnstenv mem */
> -                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
>                  break;
>              case 0x0f: /* fnstcw mem */
>                  gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
> -                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                      s->mem_index, MO_LEUW);
>                  break;
>              case 0x1d: /* fldt mem */
> -                gen_helper_fldt_ST0(cpu_env, cpu_A0);
> +                gen_helper_fldt_ST0(cpu_env, s->A0);
>                  break;
>              case 0x1f: /* fstpt mem */
> -                gen_helper_fstt_ST0(cpu_env, cpu_A0);
> +                gen_helper_fstt_ST0(cpu_env, s->A0);
>                  gen_helper_fpop(cpu_env);
>                  break;
>              case 0x2c: /* frstor mem */
> -                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
>                  break;
>              case 0x2e: /* fnsave mem */
> -                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
>                  break;
>              case 0x2f: /* fnstsw mem */
>                  gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
> -                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
> +                tcg_gen_qemu_st_i32(cpu_tmp2_i32, s->A0,
>                                      s->mem_index, MO_LEUW);
>                  break;
>              case 0x3c: /* fbld */
> -                gen_helper_fbld_ST0(cpu_env, cpu_A0);
> +                gen_helper_fbld_ST0(cpu_env, s->A0);
>                  break;
>              case 0x3e: /* fbstp */
> -                gen_helper_fbst_ST0(cpu_env, cpu_A0);
> +                gen_helper_fbst_ST0(cpu_env, s->A0);
>                  gen_helper_fpop(cpu_env);
>                  break;
>              case 0x3d: /* fildll */
> -                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
> +                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
>                  gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
>                  break;
>              case 0x3f: /* fistpll */
>                  gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
> -                tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
> +                tcg_gen_qemu_st_i64(cpu_tmp1_i64, s->A0, s->mem_index, MO_LEQ);
>                  gen_helper_fpop(cpu_env);
>                  break;
>              default:
> @@ -6471,13 +6471,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          } else {
>              gen_stack_A0(s);
>              /* pop offset */
> -            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
>              /* NOTE: keeping EIP updated is not a problem in case of
>                 exception */
>              gen_op_jmp_v(cpu_T0);
>              /* pop selector */
>              gen_add_A0_im(s, 1 << dflag);
> -            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
>              gen_op_movl_seg_T0_vm(R_CS);
>              /* add stack offset */
>              gen_stack_update(s, val + (2 << dflag));
> @@ -6732,7 +6732,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              s->rip_offset = 1;
>              gen_lea_modrm(env, s, modrm);
>              if (!(s->prefix & PREFIX_LOCK)) {
> -                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T0, s->A0);
>              }
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T0, rm);
> @@ -6768,10 +6768,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_exts(ot, cpu_T1);
>              tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
>              tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
> -            tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
> -            gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
> +            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
> +            gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
>              if (!(s->prefix & PREFIX_LOCK)) {
> -                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T0, s->A0);
>              }
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T0, rm);
> @@ -6785,20 +6785,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              case 0: /* bt */
>                  /* Needs no atomic ops; we surpressed the normal
>                     memory load for LOCK above so do it now.  */
> -                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, ot, cpu_T0, s->A0);
>                  break;
>              case 1: /* bts */
> -                tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
> +                tcg_gen_atomic_fetch_or_tl(cpu_T0, s->A0, cpu_tmp0,
>                                             s->mem_index, ot | MO_LE);
>                  break;
>              case 2: /* btr */
>                  tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
> -                tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
> +                tcg_gen_atomic_fetch_and_tl(cpu_T0, s->A0, cpu_tmp0,
>                                              s->mem_index, ot | MO_LE);
>                  break;
>              default:
>              case 3: /* btc */
> -                tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
> +                tcg_gen_atomic_fetch_xor_tl(cpu_T0, s->A0, cpu_tmp0,
>                                              s->mem_index, ot | MO_LE);
>                  break;
>              }
> @@ -6822,7 +6822,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              if (op != 0) {
>                  if (mod != 3) {
> -                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
> +                    gen_op_st_v(s, ot, cpu_T0, s->A0);
>                  } else {
>                      gen_op_mov_reg_v(ot, rm, cpu_T0);
>                  }
> @@ -7051,9 +7051,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_lea_modrm(env, s, modrm);
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
>          if (ot == MO_16) {
> -            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
> +            gen_helper_boundw(cpu_env, s->A0, cpu_tmp2_i32);
>          } else {
> -            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
> +            gen_helper_boundl(cpu_env, s->A0, cpu_tmp2_i32);
>          }
>          break;
>      case 0x1c8 ... 0x1cf: /* bswap reg */
> @@ -7293,13 +7293,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>              tcg_gen_ld32u_tl(cpu_T0,
>                               cpu_env, offsetof(CPUX86State, gdt.limit));
> -            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
>              gen_add_A0_im(s, 2);
>              tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
>              if (dflag == MO_16) {
>                  tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
>              }
> -            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
>              break;
>
>          case 0xc8: /* monitor */
> @@ -7308,10 +7308,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_update_cc_op(s);
>              gen_jmp_im(pc_start - s->cs_base);
> -            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
> -            gen_extu(s->aflag, cpu_A0);
> +            tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
> +            gen_extu(s->aflag, s->A0);
>              gen_add_A0_ds_seg(s);
> -            gen_helper_monitor(cpu_env, cpu_A0);
> +            gen_helper_monitor(cpu_env, s->A0);
>              break;
>
>          case 0xc9: /* mwait */
> @@ -7348,13 +7348,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
>              gen_lea_modrm(env, s, modrm);
>              tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
> -            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
>              gen_add_A0_im(s, 2);
>              tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
>              if (dflag == MO_16) {
>                  tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
>              }
> -            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
>              break;
>
>          case 0xd0: /* xgetbv */
> @@ -7498,9 +7498,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
>              gen_lea_modrm(env, s, modrm);
> -            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
> +            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
>              gen_add_A0_im(s, 2);
> -            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
>              if (dflag == MO_16) {
>                  tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
>              }
> @@ -7515,9 +7515,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
>              gen_lea_modrm(env, s, modrm);
> -            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
> +            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
>              gen_add_A0_im(s, 2);
> -            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
> +            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
>              if (dflag == MO_16) {
>                  tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
>              }
> @@ -7573,7 +7573,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_update_cc_op(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_lea_modrm(env, s, modrm);
> -            gen_helper_invlpg(cpu_env, cpu_A0);
> +            gen_helper_invlpg(cpu_env, s->A0);
>              gen_jmp_im(s->pc - s->cs_base);
>              gen_eob(s);
>              break;
> @@ -7646,7 +7646,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T0);
>              } else {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0);
> +                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, s->A0);
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T0);
>              }
>          } else
> @@ -7667,9 +7667,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              rm = modrm & 7;
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, ot, t0, cpu_A0);
> +                gen_op_ld_v(s, ot, t0, s->A0);
>                  a0 = tcg_temp_local_new();
> -                tcg_gen_mov_tl(a0, cpu_A0);
> +                tcg_gen_mov_tl(a0, s->A0);
>              } else {
>                  gen_op_mov_v_reg(ot, t0, rm);
>                  a0 = NULL;
> @@ -7785,16 +7785,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  } else {
>                      gen_lea_modrm(env, s, modrm);
>                      if (CODE64(s)) {
> -                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
> +                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
>                                              s->mem_index, MO_LEQ);
> -                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
> -                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
> +                        tcg_gen_addi_tl(s->A0, s->A0, 8);
> +                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
>                                              s->mem_index, MO_LEQ);
>                      } else {
> -                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
> +                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
>                                              s->mem_index, MO_LEUL);
> -                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
> -                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
> +                        tcg_gen_addi_tl(s->A0, s->A0, 4);
> +                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
>                                              s->mem_index, MO_LEUL);
>                      }
>                      /* bnd registers are now in-use */
> @@ -7810,22 +7810,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      goto illegal_op;
>                  }
>                  if (a.base >= 0) {
> -                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
> +                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
>                  } else {
> -                    tcg_gen_movi_tl(cpu_A0, 0);
> +                    tcg_gen_movi_tl(s->A0, 0);
>                  }
> -                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
> +                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
>                  if (a.index >= 0) {
>                      tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
>                  } else {
>                      tcg_gen_movi_tl(cpu_T0, 0);
>                  }
>                  if (CODE64(s)) {
> -                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0);
> +                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, cpu_T0);
>                      tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
>                                     offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
>                  } else {
> -                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0);
> +                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, cpu_T0);
>                      tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
>                      tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
>                  }
> @@ -7859,11 +7859,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      /* rip-relative generates #ud */
>                      goto illegal_op;
>                  }
> -                tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a));
> +                tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
>                  if (!CODE64(s)) {
> -                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
> +                    tcg_gen_ext32u_tl(s->A0, s->A0);
>                  }
> -                tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0);
> +                tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
>                  /* bnd registers are now in-use */
>                  gen_set_hflag(s, HF_MPX_IU_MASK);
>                  break;
> @@ -7892,16 +7892,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  } else {
>                      gen_lea_modrm(env, s, modrm);
>                      if (CODE64(s)) {
> -                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
> +                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
>                                              s->mem_index, MO_LEQ);
> -                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
> -                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
> +                        tcg_gen_addi_tl(s->A0, s->A0, 8);
> +                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
>                                              s->mem_index, MO_LEQ);
>                      } else {
> -                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
> +                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
>                                              s->mem_index, MO_LEUL);
> -                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
> -                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
> +                        tcg_gen_addi_tl(s->A0, s->A0, 4);
> +                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
>                                              s->mem_index, MO_LEUL);
>                      }
>                  }
> @@ -7915,21 +7915,21 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      goto illegal_op;
>                  }
>                  if (a.base >= 0) {
> -                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
> +                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
>                  } else {
> -                    tcg_gen_movi_tl(cpu_A0, 0);
> +                    tcg_gen_movi_tl(s->A0, 0);
>                  }
> -                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
> +                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
>                  if (a.index >= 0) {
>                      tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
>                  } else {
>                      tcg_gen_movi_tl(cpu_T0, 0);
>                  }
>                  if (CODE64(s)) {
> -                    gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0,
> +                    gen_helper_bndstx64(cpu_env, s->A0, cpu_T0,
>                                          cpu_bndl[reg], cpu_bndu[reg]);
>                  } else {
> -                    gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0,
> +                    gen_helper_bndstx32(cpu_env, s->A0, cpu_T0,
>                                          cpu_bndl[reg], cpu_bndu[reg]);
>                  }
>              }
> @@ -8069,7 +8069,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  break;
>              }
>              gen_lea_modrm(env, s, modrm);
> -            gen_helper_fxsave(cpu_env, cpu_A0);
> +            gen_helper_fxsave(cpu_env, s->A0);
>              break;
>
>          CASE_MODRM_MEM_OP(1): /* fxrstor */
> @@ -8082,7 +8082,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  break;
>              }
>              gen_lea_modrm(env, s, modrm);
> -            gen_helper_fxrstor(cpu_env, cpu_A0);
> +            gen_helper_fxrstor(cpu_env, s->A0);
>              break;
>
>          CASE_MODRM_MEM_OP(2): /* ldmxcsr */
> @@ -8094,7 +8094,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  break;
>              }
>              gen_lea_modrm(env, s, modrm);
> -            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL);
> +            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, s->A0, s->mem_index, MO_LEUL);
>              gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
>              break;
>
> @@ -8108,7 +8108,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_lea_modrm(env, s, modrm);
>              tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
> -            gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
> +            gen_op_st_v(s, MO_32, cpu_T0, s->A0);
>              break;
>
>          CASE_MODRM_MEM_OP(4): /* xsave */
> @@ -8120,7 +8120,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>              tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
>                                    cpu_regs[R_EDX]);
> -            gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
> +            gen_helper_xsave(cpu_env, s->A0, cpu_tmp1_i64);
>              break;
>
>          CASE_MODRM_MEM_OP(5): /* xrstor */
> @@ -8132,7 +8132,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>              tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
>                                    cpu_regs[R_EDX]);
> -            gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
> +            gen_helper_xrstor(cpu_env, s->A0, cpu_tmp1_i64);
>              /* XRSTOR is how MPX is enabled, which changes how
>                 we translate.  Thus we need to end the TB.  */
>              gen_update_cc_op(s);
> @@ -8160,7 +8160,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_lea_modrm(env, s, modrm);
>                  tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
>                                        cpu_regs[R_EDX]);
> -                gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64);
> +                gen_helper_xsaveopt(cpu_env, s->A0, cpu_tmp1_i64);
>              }
>              break;
>
> @@ -8458,7 +8458,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
>
>      cpu_T0 = tcg_temp_new();
>      cpu_T1 = tcg_temp_new();
> -    cpu_A0 = tcg_temp_new();
> +    dc->A0 = tcg_temp_new();
>
>      cpu_tmp0 = tcg_temp_new();
>      cpu_tmp1_i64 = tcg_temp_new_i64();


--
Alex Bennée

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 " Emilio G. Cota
  2018-09-11 20:47   ` Richard Henderson
@ 2018-09-13 14:25   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Alex Bennée @ 2018-09-13 14:25 UTC (permalink / raw)
  To: Emilio G. Cota
  Cc: qemu-devel, Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost


Emilio G. Cota <cota@braap.org> writes:

> Signed-off-by: Emilio G. Cota <cota@braap.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  target/i386/translate.c | 1174 ++++++++++++++++++++-------------------
>  1 file changed, 594 insertions(+), 580 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index c6b1baab9d..73fd7e5b9a 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -79,7 +79,7 @@ static TCGv cpu_seg_base[6];
>  static TCGv_i64 cpu_bndl[4];
>  static TCGv_i64 cpu_bndu[4];
>  /* local temps */
> -static TCGv cpu_T0, cpu_T1;
> +static TCGv cpu_T1;
>  /* local register indexes (only used inside old micro ops) */
>  static TCGv cpu_tmp0, cpu_tmp4;
>  static TCGv_ptr cpu_ptr0, cpu_ptr1;
> @@ -138,6 +138,7 @@ typedef struct DisasContext {
>      /* TCG local temps */
>      TCGv cc_srcT;
>      TCGv A0;
> +    TCGv T0;
>
>      sigjmp_buf jmpbuf;
>  } DisasContext;
> @@ -412,9 +413,9 @@ static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
>      gen_op_mov_reg_v(size, reg, cpu_tmp0);
>  }
>
> -static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
> +static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
>  {
> -    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
> +    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], s->T0);
>      gen_op_mov_reg_v(size, reg, cpu_tmp0);
>  }
>
> @@ -431,9 +432,9 @@ static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
>  static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
>  {
>      if (d == OR_TMP0) {
> -        gen_op_st_v(s, idx, cpu_T0, s->A0);
> +        gen_op_st_v(s, idx, s->T0, s->A0);
>      } else {
> -        gen_op_mov_reg_v(idx, d, cpu_T0);
> +        gen_op_mov_reg_v(idx, d, s->T0);
>      }
>  }
>
> @@ -509,10 +510,10 @@ static inline void gen_string_movl_A0_EDI(DisasContext *s)
>      gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
>  }
>
> -static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
> +static inline void gen_op_movl_T0_Dshift(DisasContext *s, TCGMemOp ot)
>  {
> -    tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
> -    tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
> +    tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
> +    tcg_gen_shli_tl(s->T0, s->T0, ot);
>  };
>
>  static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
> @@ -610,7 +611,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
>      target_ulong next_eip;
>
>      if (s->pe && (s->cpl > s->iopl || s->vm86)) {
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          switch (ot) {
>          case MO_8:
>              gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
> @@ -630,7 +631,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
>          gen_jmp_im(cur_eip);
>          svm_flags |= (1 << (4 + ot));
>          next_eip = s->pc - s->cs_base;
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
>                                  tcg_const_i32(svm_flags),
>                                  tcg_const_i32(next_eip - cur_eip));
> @@ -640,41 +641,41 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
>  static inline void gen_movs(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_ESI(s);
> -    gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +    gen_op_ld_v(s, ot, s->T0, s->A0);
>      gen_string_movl_A0_EDI(s);
> -    gen_op_st_v(s, ot, cpu_T0, s->A0);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_ESI);
> -    gen_op_add_reg_T0(s->aflag, R_EDI);
> +    gen_op_st_v(s, ot, s->T0, s->A0);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_ESI);
> +    gen_op_add_reg_T0(s, s->aflag, R_EDI);
>  }
>
> -static void gen_op_update1_cc(void)
> +static void gen_op_update1_cc(DisasContext *s)
>  {
> -    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>  }
>
> -static void gen_op_update2_cc(void)
> +static void gen_op_update2_cc(DisasContext *s)
>  {
>      tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> -    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>  }
>
> -static void gen_op_update3_cc(TCGv reg)
> +static void gen_op_update3_cc(DisasContext *s, TCGv reg)
>  {
>      tcg_gen_mov_tl(cpu_cc_src2, reg);
>      tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> -    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>  }
>
> -static inline void gen_op_testl_T0_T1_cc(void)
> +static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
>  {
> -    tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
> +    tcg_gen_and_tl(cpu_cc_dst, s->T0, cpu_T1);
>  }
>
>  static void gen_op_update_neg_cc(DisasContext *s)
>  {
> -    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -    tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
> +    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +    tcg_gen_neg_tl(cpu_cc_src, s->T0);
>      tcg_gen_movi_tl(s->cc_srcT, 0);
>  }
>
> @@ -1022,11 +1023,11 @@ static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
>     value 'b'. In the fast case, T0 is guaranted not to be used. */
>  static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
>  {
> -    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
> +    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
>
>      if (cc.mask != -1) {
> -        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
> -        cc.reg = cpu_T0;
> +        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
> +        cc.reg = s->T0;
>      }
>      if (cc.use_reg2) {
>          tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
> @@ -1040,12 +1041,12 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
>     A translation block must end soon.  */
>  static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
>  {
> -    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
> +    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
>
>      gen_update_cc_op(s);
>      if (cc.mask != -1) {
> -        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
> -        cc.reg = cpu_T0;
> +        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
> +        cc.reg = s->T0;
>      }
>      set_cc_op(s, CC_OP_DYNAMIC);
>      if (cc.use_reg2) {
> @@ -1070,20 +1071,20 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
>
>  static inline void gen_stos(DisasContext *s, TCGMemOp ot)
>  {
> -    gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
> +    gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
>      gen_string_movl_A0_EDI(s);
> -    gen_op_st_v(s, ot, cpu_T0, s->A0);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_EDI);
> +    gen_op_st_v(s, ot, s->T0, s->A0);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_EDI);
>  }
>
>  static inline void gen_lods(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_ESI(s);
> -    gen_op_ld_v(s, ot, cpu_T0, s->A0);
> -    gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_ESI);
> +    gen_op_ld_v(s, ot, s->T0, s->A0);
> +    gen_op_mov_reg_v(ot, R_EAX, s->T0);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_ESI);
>  }
>
>  static inline void gen_scas(DisasContext *s, TCGMemOp ot)
> @@ -1091,8 +1092,8 @@ static inline void gen_scas(DisasContext *s, TCGMemOp ot)
>      gen_string_movl_A0_EDI(s);
>      gen_op_ld_v(s, ot, cpu_T1, s->A0);
>      gen_op(s, OP_CMPL, ot, R_EAX);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_EDI);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_EDI);
>  }
>
>  static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
> @@ -1101,9 +1102,9 @@ static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
>      gen_op_ld_v(s, ot, cpu_T1, s->A0);
>      gen_string_movl_A0_ESI(s);
>      gen_op(s, OP_CMPL, ot, OR_TMP0);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_ESI);
> -    gen_op_add_reg_T0(s->aflag, R_EDI);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_ESI);
> +    gen_op_add_reg_T0(s, s->aflag, R_EDI);
>  }
>
>  static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
> @@ -1127,14 +1128,14 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot)
>      gen_string_movl_A0_EDI(s);
>      /* Note: we must do this dummy write first to be restartable in
>         case of page fault. */
> -    tcg_gen_movi_tl(cpu_T0, 0);
> -    gen_op_st_v(s, ot, cpu_T0, s->A0);
> +    tcg_gen_movi_tl(s->T0, 0);
> +    gen_op_st_v(s, ot, s->T0, s->A0);
>      tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
>      tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
> -    gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
> -    gen_op_st_v(s, ot, cpu_T0, s->A0);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_EDI);
> +    gen_helper_in_func(ot, s->T0, cpu_tmp2_i32);
> +    gen_op_st_v(s, ot, s->T0, s->A0);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_EDI);
>      gen_bpt_io(s, cpu_tmp2_i32, ot);
>      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>          gen_io_end();
> @@ -1147,14 +1148,14 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
>          gen_io_start();
>      }
>      gen_string_movl_A0_ESI(s);
> -    gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +    gen_op_ld_v(s, ot, s->T0, s->A0);
>
>      tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
>      tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
> -    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
> +    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T0);
>      gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
> -    gen_op_movl_T0_Dshift(ot);
> -    gen_op_add_reg_T0(s->aflag, R_ESI);
> +    gen_op_movl_T0_Dshift(s, ot);
> +    gen_op_add_reg_T0(s, s->aflag, R_ESI);
>      gen_bpt_io(s, cpu_tmp2_i32, ot);
>      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>          gen_io_end();
> @@ -1265,103 +1266,103 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
>  static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>  {
>      if (d != OR_TMP0) {
> -        gen_op_mov_v_reg(ot, cpu_T0, d);
> +        gen_op_mov_v_reg(ot, s1->T0, d);
>      } else if (!(s1->prefix & PREFIX_LOCK)) {
> -        gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
> +        gen_op_ld_v(s1, ot, s1->T0, s1->A0);
>      }
>      switch(op) {
>      case OP_ADCL:
>          gen_compute_eflags_c(s1, cpu_tmp4);
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
> -            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
> +            tcg_gen_add_tl(s1->T0, cpu_tmp4, cpu_T1);
> +            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> -            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
> +            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_add_tl(s1->T0, s1->T0, cpu_tmp4);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update3_cc(cpu_tmp4);
> +        gen_op_update3_cc(s1, cpu_tmp4);
>          set_cc_op(s1, CC_OP_ADCB + ot);
>          break;
>      case OP_SBBL:
>          gen_compute_eflags_c(s1, cpu_tmp4);
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
> -            tcg_gen_neg_tl(cpu_T0, cpu_T0);
> -            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
> +            tcg_gen_add_tl(s1->T0, cpu_T1, cpu_tmp4);
> +            tcg_gen_neg_tl(s1->T0, s1->T0);
> +            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
> -            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
> +            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_tmp4);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update3_cc(cpu_tmp4);
> +        gen_op_update3_cc(s1, cpu_tmp4);
>          set_cc_op(s1, CC_OP_SBBB + ot);
>          break;
>      case OP_ADDL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, cpu_T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update2_cc();
> +        gen_op_update2_cc(s1);
>          set_cc_op(s1, CC_OP_ADDB + ot);
>          break;
>      case OP_SUBL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_neg_tl(cpu_T0, cpu_T1);
> -            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, cpu_T0,
> +            tcg_gen_neg_tl(s1->T0, cpu_T1);
> +            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
>                                          s1->mem_index, ot | MO_LE);
> -            tcg_gen_sub_tl(cpu_T0, s1->cc_srcT, cpu_T1);
> +            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, cpu_T1);
>          } else {
> -            tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
> -            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
> +            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update2_cc();
> +        gen_op_update2_cc(s1);
>          set_cc_op(s1, CC_OP_SUBB + ot);
>          break;
>      default:
>      case OP_ANDL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_and_fetch_tl(cpu_T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, cpu_T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_and_tl(s1->T0, s1->T0, cpu_T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update1_cc();
> +        gen_op_update1_cc(s1);
>          set_cc_op(s1, CC_OP_LOGICB + ot);
>          break;
>      case OP_ORL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_or_fetch_tl(cpu_T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, cpu_T1,
>                                         s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_or_tl(s1->T0, s1->T0, cpu_T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update1_cc();
> +        gen_op_update1_cc(s1);
>          set_cc_op(s1, CC_OP_LOGICB + ot);
>          break;
>      case OP_XORL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_xor_fetch_tl(cpu_T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, cpu_T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_xor_tl(s1->T0, s1->T0, cpu_T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> -        gen_op_update1_cc();
> +        gen_op_update1_cc(s1);
>          set_cc_op(s1, CC_OP_LOGICB + ot);
>          break;
>      case OP_CMPL:
>          tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> -        tcg_gen_mov_tl(s1->cc_srcT, cpu_T0);
> -        tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
> +        tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
> +        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, cpu_T1);
>          set_cc_op(s1, CC_OP_SUBB + ot);
>          break;
>      }
> @@ -1371,21 +1372,21 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>  static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
>  {
>      if (s1->prefix & PREFIX_LOCK) {
> -        tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
> -        tcg_gen_atomic_add_fetch_tl(cpu_T0, s1->A0, cpu_T0,
> +        tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
> +        tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
>                                      s1->mem_index, ot | MO_LE);
>      } else {
>          if (d != OR_TMP0) {
> -            gen_op_mov_v_reg(ot, cpu_T0, d);
> +            gen_op_mov_v_reg(ot, s1->T0, d);
>          } else {
> -            gen_op_ld_v(s1, ot, cpu_T0, s1->A0);
> +            gen_op_ld_v(s1, ot, s1->T0, s1->A0);
>          }
> -        tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
> +        tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
>          gen_op_st_rm_T0_A0(s1, ot, d);
>      }
>
>      gen_compute_eflags_c(s1, cpu_cc_src);
> -    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +    tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
>      set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
>  }
>
> @@ -1441,9 +1442,9 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +        gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, cpu_T0, op1);
> +        gen_op_mov_v_reg(ot, s->T0, op1);
>      }
>
>      tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
> @@ -1451,23 +1452,23 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      if (is_right) {
>          if (is_arith) {
> -            gen_exts(ot, cpu_T0);
> -            tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
> -            tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
> +            gen_exts(ot, s->T0);
> +            tcg_gen_sar_tl(cpu_tmp0, s->T0, cpu_tmp0);
> +            tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
>          } else {
> -            gen_extu(ot, cpu_T0);
> -            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
> -            tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
> +            gen_extu(ot, s->T0);
> +            tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
> +            tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
>          }
>      } else {
> -        tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
> -        tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
> +        tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
> +        tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
>      }
>
>      /* store */
>      gen_op_st_rm_T0_A0(s, ot, op1);
>
> -    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
> +    gen_shift_flags(s, ot, s->T0, cpu_tmp0, cpu_T1, is_right);
>  }
>
>  static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
> @@ -1477,25 +1478,25 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>
>      /* load */
>      if (op1 == OR_TMP0)
> -        gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +        gen_op_ld_v(s, ot, s->T0, s->A0);
>      else
> -        gen_op_mov_v_reg(ot, cpu_T0, op1);
> +        gen_op_mov_v_reg(ot, s->T0, op1);
>
>      op2 &= mask;
>      if (op2 != 0) {
>          if (is_right) {
>              if (is_arith) {
> -                gen_exts(ot, cpu_T0);
> -                tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
> -                tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
> +                gen_exts(ot, s->T0);
> +                tcg_gen_sari_tl(cpu_tmp4, s->T0, op2 - 1);
> +                tcg_gen_sari_tl(s->T0, s->T0, op2);
>              } else {
> -                gen_extu(ot, cpu_T0);
> -                tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
> -                tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
> +                gen_extu(ot, s->T0);
> +                tcg_gen_shri_tl(cpu_tmp4, s->T0, op2 - 1);
> +                tcg_gen_shri_tl(s->T0, s->T0, op2);
>              }
>          } else {
> -            tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
> -            tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
> +            tcg_gen_shli_tl(cpu_tmp4, s->T0, op2 - 1);
> +            tcg_gen_shli_tl(s->T0, s->T0, op2);
>          }
>      }
>
> @@ -1505,7 +1506,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>      /* update eflags if non zero shift */
>      if (op2 != 0) {
>          tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
> -        tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +        tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>          set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
>      }
>  }
> @@ -1517,9 +1518,9 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +        gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, cpu_T0, op1);
> +        gen_op_mov_v_reg(ot, s->T0, op1);
>      }
>
>      tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
> @@ -1527,31 +1528,31 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>      switch (ot) {
>      case MO_8:
>          /* Replicate the 8-bit input so that a 32-bit rotate works.  */
> -        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
> -        tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
> +        tcg_gen_ext8u_tl(s->T0, s->T0);
> +        tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
>          goto do_long;
>      case MO_16:
>          /* Replicate the 16-bit input so that a 32-bit rotate works.  */
> -        tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
> +        tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
>          goto do_long;
>      do_long:
>  #ifdef TARGET_X86_64
>      case MO_32:
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
>          if (is_right) {
>              tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
>          } else {
>              tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
>          }
> -        tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
> +        tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
>          break;
>  #endif
>      default:
>          if (is_right) {
> -            tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_rotr_tl(s->T0, s->T0, cpu_T1);
>          } else {
> -            tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_rotl_tl(s->T0, s->T0, cpu_T1);
>          }
>          break;
>      }
> @@ -1567,12 +1568,12 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>         since we've computed the flags into CC_SRC, these variables are
>         currently dead.  */
>      if (is_right) {
> -        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
> -        tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
> +        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
> +        tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
>          tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
>      } else {
> -        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
> -        tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
> +        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
> +        tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
>      }
>      tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
>      tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
> @@ -1603,9 +1604,9 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +        gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, cpu_T0, op1);
> +        gen_op_mov_v_reg(ot, s->T0, op1);
>      }
>
>      op2 &= mask;
> @@ -1613,20 +1614,20 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>          switch (ot) {
>  #ifdef TARGET_X86_64
>          case MO_32:
> -            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>              if (is_right) {
>                  tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
>              } else {
>                  tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
>              }
> -            tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
> +            tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
>              break;
>  #endif
>          default:
>              if (is_right) {
> -                tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
> +                tcg_gen_rotri_tl(s->T0, s->T0, op2);
>              } else {
> -                tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
> +                tcg_gen_rotli_tl(s->T0, s->T0, op2);
>              }
>              break;
>          case MO_8:
> @@ -1639,10 +1640,10 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>              if (is_right) {
>                  shift = mask + 1 - shift;
>              }
> -            gen_extu(ot, cpu_T0);
> -            tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
> -            tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
> -            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
> +            gen_extu(ot, s->T0);
> +            tcg_gen_shli_tl(cpu_tmp0, s->T0, shift);
> +            tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
> +            tcg_gen_or_tl(s->T0, s->T0, cpu_tmp0);
>              break;
>          }
>      }
> @@ -1659,12 +1660,12 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>             since we've computed the flags into CC_SRC, these variables are
>             currently dead.  */
>          if (is_right) {
> -            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
> -            tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
> +            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
> +            tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
>              tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
>          } else {
> -            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
> -            tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
> +            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
> +            tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
>          }
>          tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
>          tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
> @@ -1681,24 +1682,24 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      /* load */
>      if (op1 == OR_TMP0)
> -        gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +        gen_op_ld_v(s, ot, s->T0, s->A0);
>      else
> -        gen_op_mov_v_reg(ot, cpu_T0, op1);
> +        gen_op_mov_v_reg(ot, s->T0, op1);
>
>      if (is_right) {
>          switch (ot) {
>          case MO_8:
> -            gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rcrb(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>          case MO_16:
> -            gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rcrw(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>          case MO_32:
> -            gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rcrl(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rcrq(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>  #endif
>          default:
> @@ -1707,17 +1708,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>      } else {
>          switch (ot) {
>          case MO_8:
> -            gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rclb(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>          case MO_16:
> -            gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rclw(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>          case MO_32:
> -            gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rcll(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
> +            gen_helper_rclq(s->T0, cpu_env, s->T0, cpu_T1);
>              break;
>  #endif
>          default:
> @@ -1737,9 +1738,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>      /* load */
>      if (op1 == OR_TMP0) {
> -        gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +        gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, cpu_T0, op1);
> +        gen_op_mov_v_reg(ot, s->T0, op1);
>      }
>
>      count = tcg_temp_new();
> @@ -1751,11 +1752,11 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>             This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
>             portion by constructing it as a 32-bit value.  */
>          if (is_right) {
> -            tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
> -            tcg_gen_mov_tl(cpu_T1, cpu_T0);
> -            tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
> +            tcg_gen_deposit_tl(cpu_tmp0, s->T0, cpu_T1, 16, 16);
> +            tcg_gen_mov_tl(cpu_T1, s->T0);
> +            tcg_gen_mov_tl(s->T0, cpu_tmp0);
>          } else {
> -            tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
> +            tcg_gen_deposit_tl(cpu_T1, s->T0, cpu_T1, 16, 16);
>          }
>          /* FALLTHRU */
>  #ifdef TARGET_X86_64
> @@ -1763,28 +1764,28 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>          /* Concatenate the two 32-bit values and use a 64-bit shift.  */
>          tcg_gen_subi_tl(cpu_tmp0, count, 1);
>          if (is_right) {
> -            tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
> -            tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
> -            tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
> +            tcg_gen_concat_tl_i64(s->T0, s->T0, cpu_T1);
> +            tcg_gen_shr_i64(cpu_tmp0, s->T0, cpu_tmp0);
> +            tcg_gen_shr_i64(s->T0, s->T0, count);
>          } else {
> -            tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
> -            tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
> -            tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
> +            tcg_gen_concat_tl_i64(s->T0, cpu_T1, s->T0);
> +            tcg_gen_shl_i64(cpu_tmp0, s->T0, cpu_tmp0);
> +            tcg_gen_shl_i64(s->T0, s->T0, count);
>              tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
> -            tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
> +            tcg_gen_shri_i64(s->T0, s->T0, 32);
>          }
>          break;
>  #endif
>      default:
>          tcg_gen_subi_tl(cpu_tmp0, count, 1);
>          if (is_right) {
> -            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
> +            tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
>
>              tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
> -            tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
> +            tcg_gen_shr_tl(s->T0, s->T0, count);
>              tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
>          } else {
> -            tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
> +            tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
>              if (ot == MO_16) {
>                  /* Only needed if count > 16, for Intel behaviour.  */
>                  tcg_gen_subfi_tl(cpu_tmp4, 33, count);
> @@ -1793,20 +1794,20 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>              }
>
>              tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
> -            tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
> +            tcg_gen_shl_tl(s->T0, s->T0, count);
>              tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
>          }
>          tcg_gen_movi_tl(cpu_tmp4, 0);
>          tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
>                             cpu_tmp4, cpu_T1);
> -        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
> +        tcg_gen_or_tl(s->T0, s->T0, cpu_T1);
>          break;
>      }
>
>      /* store */
>      gen_op_st_rm_T0_A0(s, ot, op1);
>
> -    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
> +    gen_shift_flags(s, ot, s->T0, cpu_tmp0, count, is_right);
>      tcg_temp_free(count);
>  }
>
> @@ -2126,23 +2127,23 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
>      if (mod == 3) {
>          if (is_store) {
>              if (reg != OR_TMP0)
> -                gen_op_mov_v_reg(ot, cpu_T0, reg);
> -            gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                gen_op_mov_v_reg(ot, s->T0, reg);
> +            gen_op_mov_reg_v(ot, rm, s->T0);
>          } else {
> -            gen_op_mov_v_reg(ot, cpu_T0, rm);
> +            gen_op_mov_v_reg(ot, s->T0, rm);
>              if (reg != OR_TMP0)
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
>          }
>      } else {
>          gen_lea_modrm(env, s, modrm);
>          if (is_store) {
>              if (reg != OR_TMP0)
> -                gen_op_mov_v_reg(ot, cpu_T0, reg);
> -            gen_op_st_v(s, ot, cpu_T0, s->A0);
> +                gen_op_mov_v_reg(ot, s->T0, reg);
> +            gen_op_st_v(s, ot, s->T0, s->A0);
>          } else {
> -            gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +            gen_op_ld_v(s, ot, s->T0, s->A0);
>              if (reg != OR_TMP0)
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
>          }
>      }
>  }
> @@ -2251,9 +2252,9 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
>          cc.reg2 = tcg_const_tl(cc.imm);
>      }
>
> -    tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
> -                       cpu_T0, cpu_regs[reg]);
> -    gen_op_mov_reg_v(ot, reg, cpu_T0);
> +    tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
> +                       s->T0, cpu_regs[reg]);
> +    gen_op_mov_reg_v(ot, reg, s->T0);
>
>      if (cc.mask != -1) {
>          tcg_temp_free(cc.reg);
> @@ -2263,18 +2264,18 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
>      }
>  }
>
> -static inline void gen_op_movl_T0_seg(int seg_reg)
> +static inline void gen_op_movl_T0_seg(DisasContext *s, int seg_reg)
>  {
> -    tcg_gen_ld32u_tl(cpu_T0, cpu_env,
> +    tcg_gen_ld32u_tl(s->T0, cpu_env,
>                       offsetof(CPUX86State,segs[seg_reg].selector));
>  }
>
> -static inline void gen_op_movl_seg_T0_vm(int seg_reg)
> +static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg)
>  {
> -    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
> -    tcg_gen_st32_tl(cpu_T0, cpu_env,
> +    tcg_gen_ext16u_tl(s->T0, s->T0);
> +    tcg_gen_st32_tl(s->T0, cpu_env,
>                      offsetof(CPUX86State,segs[seg_reg].selector));
> -    tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
> +    tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
>  }
>
>  /* move T0 to seg_reg and compute if the CPU state may change. Never
> @@ -2282,7 +2283,7 @@ static inline void gen_op_movl_seg_T0_vm(int seg_reg)
>  static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
>  {
>      if (s->pe && !s->vm86) {
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
>          /* abort translation because the addseg value may change or
>             because ss32 may change. For R_SS, translation must always
> @@ -2292,7 +2293,7 @@ static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
>              s->base.is_jmp = DISAS_TOO_MANY;
>          }
>      } else {
> -        gen_op_movl_seg_T0_vm(seg_reg);
> +        gen_op_movl_seg_T0_vm(s, seg_reg);
>          if (seg_reg == R_SS) {
>              s->base.is_jmp = DISAS_TOO_MANY;
>          }
> @@ -2356,7 +2357,7 @@ static TCGMemOp gen_pop_T0(DisasContext *s)
>      TCGMemOp d_ot = mo_pushpop(s, s->dflag);
>
>      gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
> -    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
> +    gen_op_ld_v(s, d_ot, s->T0, s->A0);
>
>      return d_ot;
>  }
> @@ -2401,8 +2402,8 @@ static void gen_popa(DisasContext *s)
>          }
>          tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
>          gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
> -        gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
> -        gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
> +        gen_op_ld_v(s, d_ot, s->T0, s->A0);
> +        gen_op_mov_reg_v(d_ot, 7 - i, s->T0);
>      }
>
>      gen_stack_update(s, 8 * size);
> @@ -2454,11 +2455,11 @@ static void gen_leave(DisasContext *s)
>      TCGMemOp a_ot = mo_stacksize(s);
>
>      gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
> -    gen_op_ld_v(s, d_ot, cpu_T0, s->A0);
> +    gen_op_ld_v(s, d_ot, s->T0, s->A0);
>
>      tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
>
> -    gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
> +    gen_op_mov_reg_v(d_ot, R_EBP, s->T0);
>      gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
>  }
>
> @@ -3126,23 +3127,24 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].ZMM_Q(0)));
>              } else {
> -                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
> +                tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                      xmm_regs[reg].ZMM_L(0)));
> -                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
> +                gen_op_st_v(s, MO_32, s->T0, s->A0);
>              }
>              break;
>          case 0x6e: /* movd mm, ea */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
>                  gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
> -                tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
> +                tcg_gen_st_tl(s->T0, cpu_env,
> +                              offsetof(CPUX86State, fpregs[reg].mmx));
>              } else
>  #endif
>              {
>                  gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
>                  tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,fpregs[reg].mmx));
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
>              }
>              break;
> @@ -3152,14 +3154,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
>                  tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg]));
> -                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
> +                gen_helper_movq_mm_T0_xmm(cpu_ptr0, s->T0);
>              } else
>  #endif
>              {
>                  gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
>                  tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg]));
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
>              }
>              break;
> @@ -3193,12 +3195,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x210: /* movss xmm, ea */
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
> -                tcg_gen_movi_tl(cpu_T0, 0);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
> +                gen_op_ld_v(s, MO_32, s->T0, s->A0);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
> +                tcg_gen_movi_tl(s->T0, 0);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
> @@ -3210,9 +3216,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  gen_lea_modrm(env, s, modrm);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].ZMM_Q(0)));
> -                tcg_gen_movi_tl(cpu_T0, 0);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
> +                tcg_gen_movi_tl(s->T0, 0);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
> @@ -3314,13 +3322,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x7e: /* movd ea, mm */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                tcg_gen_ld_i64(cpu_T0, cpu_env,
> +                tcg_gen_ld_i64(s->T0, cpu_env,
>                                 offsetof(CPUX86State,fpregs[reg].mmx));
>                  gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
>              } else
>  #endif
>              {
> -                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
> +                tcg_gen_ld32u_tl(s->T0, cpu_env,
>                                   offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
>                  gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
>              }
> @@ -3328,13 +3336,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x17e: /* movd ea, xmm */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                tcg_gen_ld_i64(cpu_T0, cpu_env,
> +                tcg_gen_ld_i64(s->T0, cpu_env,
>                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
>                  gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
>              } else
>  #endif
>              {
> -                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
> +                tcg_gen_ld32u_tl(s->T0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
>                  gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
>              }
> @@ -3379,8 +3387,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>          case 0x211: /* movss ea, xmm */
>              if (mod != 3) {
>                  gen_lea_modrm(env, s, modrm);
> -                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
> -                gen_op_st_v(s, MO_32, cpu_T0, s->A0);
> +                tcg_gen_ld32u_tl(s->T0, cpu_env,
> +                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
> +                gen_op_st_v(s, MO_32, s->T0, s->A0);
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
> @@ -3429,16 +3438,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              }
>              val = x86_ldub_code(env, s);
>              if (is_xmm) {
> -                tcg_gen_movi_tl(cpu_T0, val);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
> -                tcg_gen_movi_tl(cpu_T0, 0);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
> +                tcg_gen_movi_tl(s->T0, val);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
> +                tcg_gen_movi_tl(s->T0, 0);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
>                  op1_offset = offsetof(CPUX86State,xmm_t0);
>              } else {
> -                tcg_gen_movi_tl(cpu_T0, val);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
> -                tcg_gen_movi_tl(cpu_T0, 0);
> -                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
> +                tcg_gen_movi_tl(s->T0, val);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, mmx_t0.MMX_L(0)));
> +                tcg_gen_movi_tl(s->T0, 0);
> +                tcg_gen_st32_tl(s->T0, cpu_env,
> +                                offsetof(CPUX86State, mmx_t0.MMX_L(1)));
>                  op1_offset = offsetof(CPUX86State,mmx_t0);
>              }
>              sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
> @@ -3503,12 +3516,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
>              if (ot == MO_32) {
>                  SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
>              } else {
>  #ifdef TARGET_X86_64
>                  SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
> -                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
> +                sse_fn_epl(cpu_env, cpu_ptr0, s->T0);
>  #else
>                  goto illegal_op;
>  #endif
> @@ -3555,8 +3568,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  if ((b >> 8) & 1) {
>                      gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
>                  } else {
> -                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
> -                    tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
> +                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
> +                    tcg_gen_st32_tl(s->T0, cpu_env,
> +                                    offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
>                  }
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
>              } else {
> @@ -3568,17 +3582,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  SSEFunc_i_ep sse_fn_i_ep =
>                      sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
>                  sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
> -                tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
> +                tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
>              } else {
>  #ifdef TARGET_X86_64
>                  SSEFunc_l_ep sse_fn_l_ep =
>                      sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
> -                sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
> +                sse_fn_l_ep(s->T0, cpu_env, cpu_ptr0);
>  #else
>                  goto illegal_op;
>  #endif
>              }
> -            gen_op_mov_reg_v(ot, reg, cpu_T0);
> +            gen_op_mov_reg_v(ot, reg, s->T0);
>              break;
>          case 0xc4: /* pinsrw */
>          case 0x1c4:
> @@ -3587,11 +3601,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              val = x86_ldub_code(env, s);
>              if (b1) {
>                  val &= 7;
> -                tcg_gen_st16_tl(cpu_T0, cpu_env,
> +                tcg_gen_st16_tl(s->T0, cpu_env,
>                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
>              } else {
>                  val &= 3;
> -                tcg_gen_st16_tl(cpu_T0, cpu_env,
> +                tcg_gen_st16_tl(s->T0, cpu_env,
>                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
>              }
>              break;
> @@ -3604,16 +3618,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>              if (b1) {
>                  val &= 7;
>                  rm = (modrm & 7) | REX_B(s);
> -                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
> +                tcg_gen_ld16u_tl(s->T0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
>              } else {
>                  val &= 3;
>                  rm = (modrm & 7);
> -                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
> +                tcg_gen_ld16u_tl(s->T0, cpu_env,
>                                  offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
>              }
>              reg = ((modrm >> 3) & 7) | rex_r;
> -            gen_op_mov_reg_v(ot, reg, cpu_T0);
> +            gen_op_mov_reg_v(ot, reg, s->T0);
>              break;
>          case 0x1d6: /* movq ea, xmm */
>              if (mod != 3) {
> @@ -3760,11 +3774,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -                gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
> -                                 cpu_T0, tcg_const_i32(8 << ot));
> +                gen_helper_crc32(s->T0, cpu_tmp2_i32,
> +                                 s->T0, tcg_const_i32(8 << ot));
>
>                  ot = mo_64_32(s->dflag);
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
>                  break;
>
>              case 0x1f0: /* crc32 or movbe */
> @@ -3789,9 +3803,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>
>                  gen_lea_modrm(env, s, modrm);
>                  if ((b & 1) == 0) {
> -                    tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
> +                    tcg_gen_qemu_ld_tl(s->T0, s->A0,
>                                         s->mem_index, ot | MO_BE);
> -                    gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                    gen_op_mov_reg_v(ot, reg, s->T0);
>                  } else {
>                      tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
>                                         s->mem_index, ot | MO_BE);
> @@ -3806,9 +3820,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  }
>                  ot = mo_64_32(s->dflag);
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_regs[s->vex_v]);
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> -                gen_op_update1_cc();
> +                tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_update1_cc(s);
>                  set_cc_op(s, CC_OP_LOGICB + ot);
>                  break;
>
> @@ -3826,12 +3840,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      /* Extract START, and shift the operand.
>                         Shifts larger than operand size get zeros.  */
>                      tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
> -                    tcg_gen_shr_tl(cpu_T0, cpu_T0, s->A0);
> +                    tcg_gen_shr_tl(s->T0, s->T0, s->A0);
>
>                      bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
>                      zero = tcg_const_tl(0);
> -                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, s->A0, bound,
> -                                       cpu_T0, zero);
> +                    tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
> +                                       s->T0, zero);
>                      tcg_temp_free(zero);
>
>                      /* Extract the LEN into a mask.  Lengths larger than
> @@ -3843,10 +3857,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_movi_tl(cpu_T1, 1);
>                      tcg_gen_shl_tl(cpu_T1, cpu_T1, s->A0);
>                      tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
> -                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
>
> -                    gen_op_mov_reg_v(ot, reg, cpu_T0);
> -                    gen_op_update1_cc();
> +                    gen_op_mov_reg_v(ot, reg, s->T0);
> +                    gen_op_update1_cc(s);
>                      set_cc_op(s, CC_OP_LOGICB + ot);
>                  }
>                  break;
> @@ -3872,9 +3886,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  }
>                  tcg_gen_movi_tl(s->A0, -1);
>                  tcg_gen_shl_tl(s->A0, s->A0, cpu_T1);
> -                tcg_gen_andc_tl(cpu_T0, cpu_T0, s->A0);
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> -                gen_op_update1_cc();
> +                tcg_gen_andc_tl(s->T0, s->T0, s->A0);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_update1_cc(s);
>                  set_cc_op(s, CC_OP_BMILGB + ot);
>                  break;
>
> @@ -3888,7 +3902,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>                  switch (ot) {
>                  default:
> -                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                      tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
>                      tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
>                                        cpu_tmp2_i32, cpu_tmp3_i32);
> @@ -3897,9 +3911,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      break;
>  #ifdef TARGET_X86_64
>                  case MO_64:
> -                    tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
> -                                      cpu_T0, cpu_regs[R_EDX]);
> -                    tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
> +                    tcg_gen_mulu2_i64(s->T0, cpu_T1,
> +                                      s->T0, cpu_regs[R_EDX]);
> +                    tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
>                      tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
>                      break;
>  #endif
> @@ -3921,7 +3935,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  } else {
>                      tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
>                  }
> -                gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
> +                gen_helper_pdep(cpu_regs[reg], s->T0, cpu_T1);
>                  break;
>
>              case 0x2f5: /* pext Gy, By, Ey */
> @@ -3939,7 +3953,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  } else {
>                      tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
>                  }
> -                gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
> +                gen_helper_pext(cpu_regs[reg], s->T0, cpu_T1);
>                  break;
>
>              case 0x1f6: /* adcx Gy, Ey */
> @@ -3997,22 +4011,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                          /* If we know TL is 64-bit, and we want a 32-bit
>                             result, just do everything in 64-bit arithmetic.  */
>                          tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
> -                        tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
> -                        tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
> -                        tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
> -                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
> -                        tcg_gen_shri_i64(carry_out, cpu_T0, 32);
> +                        tcg_gen_ext32u_i64(s->T0, s->T0);
> +                        tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
> +                        tcg_gen_add_i64(s->T0, s->T0, carry_in);
> +                        tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
> +                        tcg_gen_shri_i64(carry_out, s->T0, 32);
>                          break;
>  #endif
>                      default:
>                          /* Otherwise compute the carry-out in two steps.  */
>                          zero = tcg_const_tl(0);
> -                        tcg_gen_add2_tl(cpu_T0, carry_out,
> -                                        cpu_T0, zero,
> +                        tcg_gen_add2_tl(s->T0, carry_out,
> +                                        s->T0, zero,
>                                          carry_in, zero);
>                          tcg_gen_add2_tl(cpu_regs[reg], carry_out,
>                                          cpu_regs[reg], carry_out,
> -                                        cpu_T0, zero);
> +                                        s->T0, zero);
>                          tcg_temp_free(zero);
>                          break;
>                      }
> @@ -4036,19 +4050,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
>                  }
>                  if (b == 0x1f7) {
> -                    tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
>                  } else if (b == 0x2f7) {
>                      if (ot != MO_64) {
> -                        tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
> +                        tcg_gen_ext32s_tl(s->T0, s->T0);
>                      }
> -                    tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
>                  } else {
>                      if (ot != MO_64) {
> -                        tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
> +                        tcg_gen_ext32u_tl(s->T0, s->T0);
>                      }
> -                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
>                  }
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
>                  break;
>
>              case 0x0f3:
> @@ -4063,25 +4077,25 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  ot = mo_64_32(s->dflag);
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>
> -                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> +                tcg_gen_mov_tl(cpu_cc_src, s->T0);
>                  switch (reg & 7) {
>                  case 1: /* blsr By,Ey */
> -                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
> -                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
> +                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
>                      break;
>                  case 2: /* blsmsk By,Ey */
> -                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
> -                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
> +                    tcg_gen_xor_tl(s->T0, s->T0, cpu_T1);
>                      break;
>                  case 3: /* blsi By, Ey */
> -                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
> -                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
> +                    tcg_gen_neg_tl(cpu_T1, s->T0);
> +                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
>                      break;
>                  default:
>                      goto unknown_op;
>                  }
> -                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
> +                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +                gen_op_mov_reg_v(ot, s->vex_v, s->T0);
>                  set_cc_op(s, CC_OP_BMILGB + ot);
>                  break;
>
> @@ -4119,22 +4133,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  val = x86_ldub_code(env, s);
>                  switch (b) {
>                  case 0x14: /* pextrb */
> -                    tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
> +                    tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_B(val & 15)));
>                      if (mod == 3) {
> -                        gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                        gen_op_mov_reg_v(ot, rm, s->T0);
>                      } else {
> -                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
> +                        tcg_gen_qemu_st_tl(s->T0, s->A0,
>                                             s->mem_index, MO_UB);
>                      }
>                      break;
>                  case 0x15: /* pextrw */
> -                    tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
> +                    tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_W(val & 7)));
>                      if (mod == 3) {
> -                        gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                        gen_op_mov_reg_v(ot, rm, s->T0);
>                      } else {
> -                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
> +                        tcg_gen_qemu_st_tl(s->T0, s->A0,
>                                             s->mem_index, MO_LEUW);
>                      }
>                      break;
> @@ -4166,23 +4180,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      }
>                      break;
>                  case 0x17: /* extractps */
> -                    tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
> +                    tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_L(val & 3)));
>                      if (mod == 3) {
> -                        gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                        gen_op_mov_reg_v(ot, rm, s->T0);
>                      } else {
> -                        tcg_gen_qemu_st_tl(cpu_T0, s->A0,
> +                        tcg_gen_qemu_st_tl(s->T0, s->A0,
>                                             s->mem_index, MO_LEUL);
>                      }
>                      break;
>                  case 0x20: /* pinsrb */
>                      if (mod == 3) {
> -                        gen_op_mov_v_reg(MO_32, cpu_T0, rm);
> +                        gen_op_mov_v_reg(MO_32, s->T0, rm);
>                      } else {
> -                        tcg_gen_qemu_ld_tl(cpu_T0, s->A0,
> +                        tcg_gen_qemu_ld_tl(s->T0, s->A0,
>                                             s->mem_index, MO_UB);
>                      }
> -                    tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
> +                    tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_B(val & 15)));
>                      break;
>                  case 0x21: /* insertps */
> @@ -4297,13 +4311,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>                  b = x86_ldub_code(env, s);
>                  if (ot == MO_64) {
> -                    tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
> +                    tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
>                  } else {
> -                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                      tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
> -                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
> +                    tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
>                  }
> -                gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(ot, reg, s->T0);
>                  break;
>
>              default:
> @@ -4360,8 +4374,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  switch (sz) {
>                  case 2:
>                      /* 32 bit access */
> -                    gen_op_ld_v(s, MO_32, cpu_T0, s->A0);
> -                    tcg_gen_st32_tl(cpu_T0, cpu_env,
> +                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
> +                    tcg_gen_st32_tl(s->T0, cpu_env,
>                                      offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
>                      break;
>                  case 3:
> @@ -4657,8 +4671,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  xor_zero:
>                      /* xor reg, reg optimisation */
>                      set_cc_op(s, CC_OP_CLR);
> -                    tcg_gen_movi_tl(cpu_T0, 0);
> -                    gen_op_mov_reg_v(ot, reg, cpu_T0);
> +                    tcg_gen_movi_tl(s->T0, 0);
> +                    gen_op_mov_reg_v(ot, reg, s->T0);
>                      break;
>                  } else {
>                      opreg = rm;
> @@ -4760,17 +4774,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              /* For those below that handle locked memory, don't load here.  */
>              if (!(s->prefix & PREFIX_LOCK)
>                  || op != 2) {
> -                gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +                gen_op_ld_v(s, ot, s->T0, s->A0);
>              }
>          } else {
> -            gen_op_mov_v_reg(ot, cpu_T0, rm);
> +            gen_op_mov_v_reg(ot, s->T0, rm);
>          }
>
>          switch(op) {
>          case 0: /* test */
>              val = insn_get(env, s, ot);
>              tcg_gen_movi_tl(cpu_T1, val);
> -            gen_op_testl_T0_T1_cc();
> +            gen_op_testl_T0_T1_cc(s);
>              set_cc_op(s, CC_OP_LOGICB + ot);
>              break;
>          case 2: /* not */
> @@ -4778,15 +4792,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  if (mod == 3) {
>                      goto illegal_op;
>                  }
> -                tcg_gen_movi_tl(cpu_T0, ~0);
> -                tcg_gen_atomic_xor_fetch_tl(cpu_T0, s->A0, cpu_T0,
> +                tcg_gen_movi_tl(s->T0, ~0);
> +                tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
>                                              s->mem_index, ot | MO_LE);
>              } else {
> -                tcg_gen_not_tl(cpu_T0, cpu_T0);
> +                tcg_gen_not_tl(s->T0, s->T0);
>                  if (mod != 3) {
> -                    gen_op_st_v(s, ot, cpu_T0, s->A0);
> +                    gen_op_st_v(s, ot, s->T0, s->A0);
>                  } else {
> -                    gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                    gen_op_mov_reg_v(ot, rm, s->T0);
>                  }
>              }
>              break;
> @@ -4803,7 +4817,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  label1 = gen_new_label();
>
>                  tcg_gen_mov_tl(a0, s->A0);
> -                tcg_gen_mov_tl(t0, cpu_T0);
> +                tcg_gen_mov_tl(t0, s->T0);
>
>                  gen_set_label(label1);
>                  t1 = tcg_temp_new();
> @@ -4817,14 +4831,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>
>                  tcg_temp_free(t2);
>                  tcg_temp_free(a0);
> -                tcg_gen_mov_tl(cpu_T0, t0);
> +                tcg_gen_mov_tl(s->T0, t0);
>                  tcg_temp_free(t0);
>              } else {
> -                tcg_gen_neg_tl(cpu_T0, cpu_T0);
> +                tcg_gen_neg_tl(s->T0, s->T0);
>                  if (mod != 3) {
> -                    gen_op_st_v(s, ot, cpu_T0, s->A0);
> +                    gen_op_st_v(s, ot, s->T0, s->A0);
>                  } else {
> -                    gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                    gen_op_mov_reg_v(ot, rm, s->T0);
>                  }
>              }
>              gen_op_update_neg_cc(s);
> @@ -4834,31 +4848,31 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              switch(ot) {
>              case MO_8:
>                  gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
> -                tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
> +                tcg_gen_ext8u_tl(s->T0, s->T0);
>                  tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
> -                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
> +                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +                tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
>                  set_cc_op(s, CC_OP_MULB);
>                  break;
>              case MO_16:
>                  gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
> -                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
> +                tcg_gen_ext16u_tl(s->T0, s->T0);
>                  tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
> -                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
> -                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
> -                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> +                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +                tcg_gen_shri_tl(s->T0, s->T0, 16);
> +                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
> +                tcg_gen_mov_tl(cpu_cc_src, s->T0);
>                  set_cc_op(s, CC_OP_MULW);
>                  break;
>              default:
>              case MO_32:
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
>                  tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
>                                    cpu_tmp2_i32, cpu_tmp3_i32);
> @@ -4871,7 +4885,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>  #ifdef TARGET_X86_64
>              case MO_64:
>                  tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
> -                                  cpu_T0, cpu_regs[R_EAX]);
> +                                  s->T0, cpu_regs[R_EAX]);
>                  tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
>                  tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
>                  set_cc_op(s, CC_OP_MULQ);
> @@ -4883,33 +4897,33 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              switch(ot) {
>              case MO_8:
>                  gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
> -                tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
> +                tcg_gen_ext8s_tl(s->T0, s->T0);
>                  tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
> -                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
> -                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
> +                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +                tcg_gen_ext8s_tl(cpu_tmp0, s->T0);
> +                tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
>                  set_cc_op(s, CC_OP_MULB);
>                  break;
>              case MO_16:
>                  gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
> -                tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
> +                tcg_gen_ext16s_tl(s->T0, s->T0);
>                  tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
> -                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
> -                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
> -                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
> -                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
> +                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +                tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
> +                tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
> +                tcg_gen_shri_tl(s->T0, s->T0, 16);
> +                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
>                  set_cc_op(s, CC_OP_MULW);
>                  break;
>              default:
>              case MO_32:
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
>                  tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
>                                    cpu_tmp2_i32, cpu_tmp3_i32);
> @@ -4924,7 +4938,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>  #ifdef TARGET_X86_64
>              case MO_64:
>                  tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
> -                                  cpu_T0, cpu_regs[R_EAX]);
> +                                  s->T0, cpu_regs[R_EAX]);
>                  tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
>                  tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
>                  tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
> @@ -4936,18 +4950,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 6: /* div */
>              switch(ot) {
>              case MO_8:
> -                gen_helper_divb_AL(cpu_env, cpu_T0);
> +                gen_helper_divb_AL(cpu_env, s->T0);
>                  break;
>              case MO_16:
> -                gen_helper_divw_AX(cpu_env, cpu_T0);
> +                gen_helper_divw_AX(cpu_env, s->T0);
>                  break;
>              default:
>              case MO_32:
> -                gen_helper_divl_EAX(cpu_env, cpu_T0);
> +                gen_helper_divl_EAX(cpu_env, s->T0);
>                  break;
>  #ifdef TARGET_X86_64
>              case MO_64:
> -                gen_helper_divq_EAX(cpu_env, cpu_T0);
> +                gen_helper_divq_EAX(cpu_env, s->T0);
>                  break;
>  #endif
>              }
> @@ -4955,18 +4969,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 7: /* idiv */
>              switch(ot) {
>              case MO_8:
> -                gen_helper_idivb_AL(cpu_env, cpu_T0);
> +                gen_helper_idivb_AL(cpu_env, s->T0);
>                  break;
>              case MO_16:
> -                gen_helper_idivw_AX(cpu_env, cpu_T0);
> +                gen_helper_idivw_AX(cpu_env, s->T0);
>                  break;
>              default:
>              case MO_32:
> -                gen_helper_idivl_EAX(cpu_env, cpu_T0);
> +                gen_helper_idivl_EAX(cpu_env, s->T0);
>                  break;
>  #ifdef TARGET_X86_64
>              case MO_64:
> -                gen_helper_idivq_EAX(cpu_env, cpu_T0);
> +                gen_helper_idivq_EAX(cpu_env, s->T0);
>                  break;
>  #endif
>              }
> @@ -5001,9 +5015,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod != 3) {
>              gen_lea_modrm(env, s, modrm);
>              if (op >= 2 && op != 3 && op != 5)
> -                gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +                gen_op_ld_v(s, ot, s->T0, s->A0);
>          } else {
> -            gen_op_mov_v_reg(ot, cpu_T0, rm);
> +            gen_op_mov_v_reg(ot, s->T0, rm);
>          }
>
>          switch(op) {
> @@ -5024,27 +5038,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 2: /* call Ev */
>              /* XXX: optimize if memory (no 'and' is necessary) */
>              if (dflag == MO_16) {
> -                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
> +                tcg_gen_ext16u_tl(s->T0, s->T0);
>              }
>              next_eip = s->pc - s->cs_base;
>              tcg_gen_movi_tl(cpu_T1, next_eip);
>              gen_push_v(s, cpu_T1);
> -            gen_op_jmp_v(cpu_T0);
> +            gen_op_jmp_v(s->T0);
>              gen_bnd_jmp(s);
> -            gen_jr(s, cpu_T0);
> +            gen_jr(s, s->T0);
>              break;
>          case 3: /* lcall Ev */
>              gen_op_ld_v(s, ot, cpu_T1, s->A0);
>              gen_add_A0_im(s, 1 << ot);
> -            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
> +            gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          do_lcall:
>              if (s->pe && !s->vm86) {
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
>                                             tcg_const_i32(dflag - 1),
>                                             tcg_const_tl(s->pc - s->cs_base));
>              } else {
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
>                                        tcg_const_i32(dflag - 1),
>                                        tcg_const_i32(s->pc - s->cs_base));
> @@ -5054,30 +5068,30 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              break;
>          case 4: /* jmp Ev */
>              if (dflag == MO_16) {
> -                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
> +                tcg_gen_ext16u_tl(s->T0, s->T0);
>              }
> -            gen_op_jmp_v(cpu_T0);
> +            gen_op_jmp_v(s->T0);
>              gen_bnd_jmp(s);
> -            gen_jr(s, cpu_T0);
> +            gen_jr(s, s->T0);
>              break;
>          case 5: /* ljmp Ev */
>              gen_op_ld_v(s, ot, cpu_T1, s->A0);
>              gen_add_A0_im(s, 1 << ot);
> -            gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
> +            gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          do_ljmp:
>              if (s->pe && !s->vm86) {
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
>                                            tcg_const_tl(s->pc - s->cs_base));
>              } else {
> -                gen_op_movl_seg_T0_vm(R_CS);
> +                gen_op_movl_seg_T0_vm(s, R_CS);
>                  gen_op_jmp_v(cpu_T1);
>              }
>              tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
>              gen_jr(s, cpu_tmp4);
>              break;
>          case 6: /* push Ev */
> -            gen_push_v(s, cpu_T0);
> +            gen_push_v(s, s->T0);
>              break;
>          default:
>              goto unknown_op;
> @@ -5093,7 +5107,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>          gen_op_mov_v_reg(ot, cpu_T1, reg);
> -        gen_op_testl_T0_T1_cc();
> +        gen_op_testl_T0_T1_cc(s);
>          set_cc_op(s, CC_OP_LOGICB + ot);
>          break;
>
> @@ -5102,9 +5116,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          ot = mo_b_d(b, dflag);
>          val = insn_get(env, s, ot);
>
> -        gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
> +        gen_op_mov_v_reg(ot, s->T0, OR_EAX);
>          tcg_gen_movi_tl(cpu_T1, val);
> -        gen_op_testl_T0_T1_cc();
> +        gen_op_testl_T0_T1_cc(s);
>          set_cc_op(s, CC_OP_LOGICB + ot);
>          break;
>
> @@ -5112,20 +5126,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          switch (dflag) {
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
> -            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
> -            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
> +            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
> +            tcg_gen_ext32s_tl(s->T0, s->T0);
> +            gen_op_mov_reg_v(MO_64, R_EAX, s->T0);
>              break;
>  #endif
>          case MO_32:
> -            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
> -            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
> -            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
> +            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
> +            tcg_gen_ext16s_tl(s->T0, s->T0);
> +            gen_op_mov_reg_v(MO_32, R_EAX, s->T0);
>              break;
>          case MO_16:
> -            gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
> -            tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
> -            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
> +            gen_op_mov_v_reg(MO_8, s->T0, R_EAX);
> +            tcg_gen_ext8s_tl(s->T0, s->T0);
> +            gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
>              break;
>          default:
>              tcg_abort();
> @@ -5135,22 +5149,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          switch (dflag) {
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
> -            tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
> -            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
> +            gen_op_mov_v_reg(MO_64, s->T0, R_EAX);
> +            tcg_gen_sari_tl(s->T0, s->T0, 63);
> +            gen_op_mov_reg_v(MO_64, R_EDX, s->T0);
>              break;
>  #endif
>          case MO_32:
> -            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
> -            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
> -            tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
> -            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
> +            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
> +            tcg_gen_ext32s_tl(s->T0, s->T0);
> +            tcg_gen_sari_tl(s->T0, s->T0, 31);
> +            gen_op_mov_reg_v(MO_32, R_EDX, s->T0);
>              break;
>          case MO_16:
> -            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
> -            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
> -            tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
> -            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
> +            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
> +            tcg_gen_ext16s_tl(s->T0, s->T0);
> +            tcg_gen_sari_tl(s->T0, s->T0, 15);
> +            gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
>              break;
>          default:
>              tcg_abort();
> @@ -5179,14 +5193,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          switch (ot) {
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
> +            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, s->T0, cpu_T1);
>              tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
>              tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
>              tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
>              break;
>  #endif
>          case MO_32:
> -            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>              tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
>              tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
>                                cpu_tmp2_i32, cpu_tmp3_i32);
> @@ -5197,14 +5211,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
>              break;
>          default:
> -            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
> +            tcg_gen_ext16s_tl(s->T0, s->T0);
>              tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
>              /* XXX: use 32 bit mul which could be faster */
> -            tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
> -            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> -            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
> -            tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
> -            gen_op_mov_reg_v(ot, reg, cpu_T0);
> +            tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> +            tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
> +            tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
> +            gen_op_mov_reg_v(ot, reg, s->T0);
>              break;
>          }
>          set_cc_op(s, CC_OP_MULB + ot);
> @@ -5215,27 +5229,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          modrm = x86_ldub_code(env, s);
>          reg = ((modrm >> 3) & 7) | rex_r;
>          mod = (modrm >> 6) & 3;
> -        gen_op_mov_v_reg(ot, cpu_T0, reg);
> +        gen_op_mov_v_reg(ot, s->T0, reg);
>          if (mod == 3) {
>              rm = (modrm & 7) | REX_B(s);
>              gen_op_mov_v_reg(ot, cpu_T1, rm);
> -            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> +            tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
>              gen_op_mov_reg_v(ot, reg, cpu_T1);
> -            gen_op_mov_reg_v(ot, rm, cpu_T0);
> +            gen_op_mov_reg_v(ot, rm, s->T0);
>          } else {
>              gen_lea_modrm(env, s, modrm);
>              if (s->prefix & PREFIX_LOCK) {
> -                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, cpu_T0,
> +                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, s->T0,
>                                              s->mem_index, ot | MO_LE);
> -                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> +                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
>              } else {
>                  gen_op_ld_v(s, ot, cpu_T1, s->A0);
> -                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
> -                gen_op_st_v(s, ot, cpu_T0, s->A0);
> +                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
> +                gen_op_st_v(s, ot, s->T0, s->A0);
>              }
>              gen_op_mov_reg_v(ot, reg, cpu_T1);
>          }
> -        gen_op_update2_cc();
> +        gen_op_update2_cc(s);
>          set_cc_op(s, CC_OP_ADDB + ot);
>          break;
>      case 0x1b0:
> @@ -5328,14 +5342,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          /**************************/
>          /* push/pop */
>      case 0x50 ... 0x57: /* push */
> -        gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
> -        gen_push_v(s, cpu_T0);
> +        gen_op_mov_v_reg(MO_32, s->T0, (b & 7) | REX_B(s));
> +        gen_push_v(s, s->T0);
>          break;
>      case 0x58 ... 0x5f: /* pop */
>          ot = gen_pop_T0(s);
>          /* NOTE: order is important for pop %sp */
>          gen_pop_update(s, ot);
> -        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
> +        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), s->T0);
>          break;
>      case 0x60: /* pusha */
>          if (CODE64(s))
> @@ -5354,8 +5368,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              val = insn_get(env, s, ot);
>          else
>              val = (int8_t)insn_get(env, s, MO_8);
> -        tcg_gen_movi_tl(cpu_T0, val);
> -        gen_push_v(s, cpu_T0);
> +        tcg_gen_movi_tl(s->T0, val);
> +        gen_push_v(s, s->T0);
>          break;
>      case 0x8f: /* pop Ev */
>          modrm = x86_ldub_code(env, s);
> @@ -5365,7 +5379,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              /* NOTE: order is important for pop %sp */
>              gen_pop_update(s, ot);
>              rm = (modrm & 7) | REX_B(s);
> -            gen_op_mov_reg_v(ot, rm, cpu_T0);
> +            gen_op_mov_reg_v(ot, rm, s->T0);
>          } else {
>              /* NOTE: order is important too for MMU exceptions */
>              s->popl_esp_hack = 1 << ot;
> @@ -5391,13 +5405,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0x1e: /* push ds */
>          if (CODE64(s))
>              goto illegal_op;
> -        gen_op_movl_T0_seg(b >> 3);
> -        gen_push_v(s, cpu_T0);
> +        gen_op_movl_T0_seg(s, b >> 3);
> +        gen_push_v(s, s->T0);
>          break;
>      case 0x1a0: /* push fs */
>      case 0x1a8: /* push gs */
> -        gen_op_movl_T0_seg((b >> 3) & 7);
> -        gen_push_v(s, cpu_T0);
> +        gen_op_movl_T0_seg(s, (b >> 3) & 7);
> +        gen_push_v(s, s->T0);
>          break;
>      case 0x07: /* pop es */
>      case 0x17: /* pop ss */
> @@ -5451,11 +5465,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>          }
>          val = insn_get(env, s, ot);
> -        tcg_gen_movi_tl(cpu_T0, val);
> +        tcg_gen_movi_tl(s->T0, val);
>          if (mod != 3) {
> -            gen_op_st_v(s, ot, cpu_T0, s->A0);
> +            gen_op_st_v(s, ot, s->T0, s->A0);
>          } else {
> -            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
> +            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), s->T0);
>          }
>          break;
>      case 0x8a:
> @@ -5465,7 +5479,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -        gen_op_mov_reg_v(ot, reg, cpu_T0);
> +        gen_op_mov_reg_v(ot, reg, s->T0);
>          break;
>      case 0x8e: /* mov seg, Gv */
>          modrm = x86_ldub_code(env, s);
> @@ -5491,7 +5505,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          mod = (modrm >> 6) & 3;
>          if (reg >= 6)
>              goto illegal_op;
> -        gen_op_movl_T0_seg(reg);
> +        gen_op_movl_T0_seg(s, reg);
>          ot = mod == 3 ? dflag : MO_16;
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
>          break;
> @@ -5518,30 +5532,30 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>
>              if (mod == 3) {
>                  if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
> -                    tcg_gen_sextract_tl(cpu_T0, cpu_regs[rm - 4], 8, 8);
> +                    tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
>                  } else {
> -                    gen_op_mov_v_reg(ot, cpu_T0, rm);
> +                    gen_op_mov_v_reg(ot, s->T0, rm);
>                      switch (s_ot) {
>                      case MO_UB:
> -                        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
> +                        tcg_gen_ext8u_tl(s->T0, s->T0);
>                          break;
>                      case MO_SB:
> -                        tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
> +                        tcg_gen_ext8s_tl(s->T0, s->T0);
>                          break;
>                      case MO_UW:
> -                        tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
> +                        tcg_gen_ext16u_tl(s->T0, s->T0);
>                          break;
>                      default:
>                      case MO_SW:
> -                        tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
> +                        tcg_gen_ext16s_tl(s->T0, s->T0);
>                          break;
>                      }
>                  }
> -                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(d_ot, reg, s->T0);
>              } else {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, s_ot, cpu_T0, s->A0);
> -                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
> +                gen_op_ld_v(s, s_ot, s->T0, s->A0);
> +                gen_op_mov_reg_v(d_ot, reg, s->T0);
>              }
>          }
>          break;
> @@ -5581,27 +5595,27 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              tcg_gen_movi_tl(s->A0, offset_addr);
>              gen_add_A0_ds_seg(s);
>              if ((b & 2) == 0) {
> -                gen_op_ld_v(s, ot, cpu_T0, s->A0);
> -                gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
> +                gen_op_ld_v(s, ot, s->T0, s->A0);
> +                gen_op_mov_reg_v(ot, R_EAX, s->T0);
>              } else {
> -                gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
> -                gen_op_st_v(s, ot, cpu_T0, s->A0);
> +                gen_op_mov_v_reg(ot, s->T0, R_EAX);
> +                gen_op_st_v(s, ot, s->T0, s->A0);
>              }
>          }
>          break;
>      case 0xd7: /* xlat */
>          tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
> -        tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
> -        tcg_gen_add_tl(s->A0, s->A0, cpu_T0);
> +        tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
> +        tcg_gen_add_tl(s->A0, s->A0, s->T0);
>          gen_extu(s->aflag, s->A0);
>          gen_add_A0_ds_seg(s);
> -        gen_op_ld_v(s, MO_8, cpu_T0, s->A0);
> -        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
> +        gen_op_ld_v(s, MO_8, s->T0, s->A0);
> +        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
>          break;
>      case 0xb0 ... 0xb7: /* mov R, Ib */
>          val = insn_get(env, s, MO_8);
> -        tcg_gen_movi_tl(cpu_T0, val);
> -        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0);
> +        tcg_gen_movi_tl(s->T0, val);
> +        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), s->T0);
>          break;
>      case 0xb8 ... 0xbf: /* mov R, Iv */
>  #ifdef TARGET_X86_64
> @@ -5610,16 +5624,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              /* 64 bit case */
>              tmp = x86_ldq_code(env, s);
>              reg = (b & 7) | REX_B(s);
> -            tcg_gen_movi_tl(cpu_T0, tmp);
> -            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
> +            tcg_gen_movi_tl(s->T0, tmp);
> +            gen_op_mov_reg_v(MO_64, reg, s->T0);
>          } else
>  #endif
>          {
>              ot = dflag;
>              val = insn_get(env, s, ot);
>              reg = (b & 7) | REX_B(s);
> -            tcg_gen_movi_tl(cpu_T0, val);
> -            gen_op_mov_reg_v(ot, reg, cpu_T0);
> +            tcg_gen_movi_tl(s->T0, val);
> +            gen_op_mov_reg_v(ot, reg, s->T0);
>          }
>          break;
>
> @@ -5638,15 +5652,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod == 3) {
>              rm = (modrm & 7) | REX_B(s);
>          do_xchg_reg:
> -            gen_op_mov_v_reg(ot, cpu_T0, reg);
> +            gen_op_mov_v_reg(ot, s->T0, reg);
>              gen_op_mov_v_reg(ot, cpu_T1, rm);
> -            gen_op_mov_reg_v(ot, rm, cpu_T0);
> +            gen_op_mov_reg_v(ot, rm, s->T0);
>              gen_op_mov_reg_v(ot, reg, cpu_T1);
>          } else {
>              gen_lea_modrm(env, s, modrm);
> -            gen_op_mov_v_reg(ot, cpu_T0, reg);
> +            gen_op_mov_v_reg(ot, s->T0, reg);
>              /* for xchg, lock is implicit */
> -            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, cpu_T0,
> +            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, s->T0,
>                                     s->mem_index, ot | MO_LE);
>              gen_op_mov_reg_v(ot, reg, cpu_T1);
>          }
> @@ -5678,7 +5692,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_op_ld_v(s, ot, cpu_T1, s->A0);
>          gen_add_A0_im(s, 1 << ot);
>          /* load the segment first to handle exceptions properly */
> -        gen_op_ld_v(s, MO_16, cpu_T0, s->A0);
> +        gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          gen_movl_seg_T0(s, op);
>          /* then put the data */
>          gen_op_mov_reg_v(ot, reg, cpu_T1);
> @@ -6220,8 +6234,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  switch(rm) {
>                  case 0:
>                      gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
> -                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
> -                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
> +                    tcg_gen_extu_i32_tl(s->T0, cpu_tmp2_i32);
> +                    gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
>                      break;
>                  default:
>                      goto unknown_op;
> @@ -6331,7 +6345,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0x6c: /* insS */
>      case 0x6d:
>          ot = mo_b_d32(b, dflag);
> -        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
> +        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
>          if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
> @@ -6346,7 +6360,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0x6e: /* outsS */
>      case 0x6f:
>          ot = mo_b_d32(b, dflag);
> -        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
> +        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes) | 4);
>          if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
> @@ -6366,7 +6380,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0xe5:
>          ot = mo_b_d32(b, dflag);
>          val = x86_ldub_code(env, s);
> -        tcg_gen_movi_tl(cpu_T0, val);
> +        tcg_gen_movi_tl(s->T0, val);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
> @@ -6385,7 +6399,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0xe7:
>          ot = mo_b_d32(b, dflag);
>          val = x86_ldub_code(env, s);
> -        tcg_gen_movi_tl(cpu_T0, val);
> +        tcg_gen_movi_tl(s->T0, val);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes));
>          gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
> @@ -6405,13 +6419,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0xec:
>      case 0xed:
>          ot = mo_b_d32(b, dflag);
> -        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
> +        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_start();
>  	}
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
>          gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
>          gen_bpt_io(s, cpu_tmp2_i32, ot);
> @@ -6423,7 +6437,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0xee:
>      case 0xef:
>          ot = mo_b_d32(b, dflag);
> -        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
> +        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes));
>          gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
> @@ -6431,7 +6445,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_start();
>  	}
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
>          gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
>          gen_bpt_io(s, cpu_tmp2_i32, ot);
> @@ -6448,17 +6462,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          ot = gen_pop_T0(s);
>          gen_stack_update(s, val + (1 << ot));
>          /* Note that gen_pop_T0 uses a zero-extending load.  */
> -        gen_op_jmp_v(cpu_T0);
> +        gen_op_jmp_v(s->T0);
>          gen_bnd_jmp(s);
> -        gen_jr(s, cpu_T0);
> +        gen_jr(s, s->T0);
>          break;
>      case 0xc3: /* ret */
>          ot = gen_pop_T0(s);
>          gen_pop_update(s, ot);
>          /* Note that gen_pop_T0 uses a zero-extending load.  */
> -        gen_op_jmp_v(cpu_T0);
> +        gen_op_jmp_v(s->T0);
>          gen_bnd_jmp(s);
> -        gen_jr(s, cpu_T0);
> +        gen_jr(s, s->T0);
>          break;
>      case 0xca: /* lret im */
>          val = x86_ldsw_code(env, s);
> @@ -6471,14 +6485,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          } else {
>              gen_stack_A0(s);
>              /* pop offset */
> -            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
> +            gen_op_ld_v(s, dflag, s->T0, s->A0);
>              /* NOTE: keeping EIP updated is not a problem in case of
>                 exception */
> -            gen_op_jmp_v(cpu_T0);
> +            gen_op_jmp_v(s->T0);
>              /* pop selector */
>              gen_add_A0_im(s, 1 << dflag);
> -            gen_op_ld_v(s, dflag, cpu_T0, s->A0);
> -            gen_op_movl_seg_T0_vm(R_CS);
> +            gen_op_ld_v(s, dflag, s->T0, s->A0);
> +            gen_op_movl_seg_T0_vm(s, R_CS);
>              /* add stack offset */
>              gen_stack_update(s, val + (2 << dflag));
>          }
> @@ -6521,8 +6535,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              } else if (!CODE64(s)) {
>                  tval &= 0xffffffff;
>              }
> -            tcg_gen_movi_tl(cpu_T0, next_eip);
> -            gen_push_v(s, cpu_T0);
> +            tcg_gen_movi_tl(s->T0, next_eip);
> +            gen_push_v(s, s->T0);
>              gen_bnd_jmp(s);
>              gen_jmp(s, tval);
>          }
> @@ -6537,7 +6551,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              offset = insn_get(env, s, ot);
>              selector = insn_get(env, s, MO_16);
>
> -            tcg_gen_movi_tl(cpu_T0, selector);
> +            tcg_gen_movi_tl(s->T0, selector);
>              tcg_gen_movi_tl(cpu_T1, offset);
>          }
>          goto do_lcall;
> @@ -6566,7 +6580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              offset = insn_get(env, s, ot);
>              selector = insn_get(env, s, MO_16);
>
> -            tcg_gen_movi_tl(cpu_T0, selector);
> +            tcg_gen_movi_tl(s->T0, selector);
>              tcg_gen_movi_tl(cpu_T1, offset);
>          }
>          goto do_ljmp;
> @@ -6599,7 +6613,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>
>      case 0x190 ... 0x19f: /* setcc Gv */
>          modrm = x86_ldub_code(env, s);
> -        gen_setcc1(s, b, cpu_T0);
> +        gen_setcc1(s, b, s->T0);
>          gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
>          break;
>      case 0x140 ... 0x14f: /* cmov Gv, Ev */
> @@ -6620,8 +6634,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>          } else {
>              gen_update_cc_op(s);
> -            gen_helper_read_eflags(cpu_T0, cpu_env);
> -            gen_push_v(s, cpu_T0);
> +            gen_helper_read_eflags(s->T0, cpu_env);
> +            gen_push_v(s, s->T0);
>          }
>          break;
>      case 0x9d: /* popf */
> @@ -6632,13 +6646,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              ot = gen_pop_T0(s);
>              if (s->cpl == 0) {
>                  if (dflag != MO_16) {
> -                    gen_helper_write_eflags(cpu_env, cpu_T0,
> +                    gen_helper_write_eflags(cpu_env, s->T0,
>                                              tcg_const_i32((TF_MASK | AC_MASK |
>                                                             ID_MASK | NT_MASK |
>                                                             IF_MASK |
>                                                             IOPL_MASK)));
>                  } else {
> -                    gen_helper_write_eflags(cpu_env, cpu_T0,
> +                    gen_helper_write_eflags(cpu_env, s->T0,
>                                              tcg_const_i32((TF_MASK | AC_MASK |
>                                                             ID_MASK | NT_MASK |
>                                                             IF_MASK | IOPL_MASK)
> @@ -6647,14 +6661,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              } else {
>                  if (s->cpl <= s->iopl) {
>                      if (dflag != MO_16) {
> -                        gen_helper_write_eflags(cpu_env, cpu_T0,
> +                        gen_helper_write_eflags(cpu_env, s->T0,
>                                                  tcg_const_i32((TF_MASK |
>                                                                 AC_MASK |
>                                                                 ID_MASK |
>                                                                 NT_MASK |
>                                                                 IF_MASK)));
>                      } else {
> -                        gen_helper_write_eflags(cpu_env, cpu_T0,
> +                        gen_helper_write_eflags(cpu_env, s->T0,
>                                                  tcg_const_i32((TF_MASK |
>                                                                 AC_MASK |
>                                                                 ID_MASK |
> @@ -6664,11 +6678,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      }
>                  } else {
>                      if (dflag != MO_16) {
> -                        gen_helper_write_eflags(cpu_env, cpu_T0,
> +                        gen_helper_write_eflags(cpu_env, s->T0,
>                                             tcg_const_i32((TF_MASK | AC_MASK |
>                                                            ID_MASK | NT_MASK)));
>                      } else {
> -                        gen_helper_write_eflags(cpu_env, cpu_T0,
> +                        gen_helper_write_eflags(cpu_env, s->T0,
>                                             tcg_const_i32((TF_MASK | AC_MASK |
>                                                            ID_MASK | NT_MASK)
>                                                           & 0xffff));
> @@ -6685,19 +6699,19 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0x9e: /* sahf */
>          if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
>              goto illegal_op;
> -        gen_op_mov_v_reg(MO_8, cpu_T0, R_AH);
> +        gen_op_mov_v_reg(MO_8, s->T0, R_AH);
>          gen_compute_eflags(s);
>          tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
> -        tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
> -        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0);
> +        tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
> +        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
>          break;
>      case 0x9f: /* lahf */
>          if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
>              goto illegal_op;
>          gen_compute_eflags(s);
>          /* Note: gen_compute_eflags() only gives the condition codes */
> -        tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02);
> -        gen_op_mov_reg_v(MO_8, R_AH, cpu_T0);
> +        tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
> +        gen_op_mov_reg_v(MO_8, R_AH, s->T0);
>          break;
>      case 0xf5: /* cmc */
>          gen_compute_eflags(s);
> @@ -6732,10 +6746,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              s->rip_offset = 1;
>              gen_lea_modrm(env, s, modrm);
>              if (!(s->prefix & PREFIX_LOCK)) {
> -                gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +                gen_op_ld_v(s, ot, s->T0, s->A0);
>              }
>          } else {
> -            gen_op_mov_v_reg(ot, cpu_T0, rm);
> +            gen_op_mov_v_reg(ot, s->T0, rm);
>          }
>          /* load shift */
>          val = x86_ldub_code(env, s);
> @@ -6771,10 +6785,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
>              gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
>              if (!(s->prefix & PREFIX_LOCK)) {
> -                gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +                gen_op_ld_v(s, ot, s->T0, s->A0);
>              }
>          } else {
> -            gen_op_mov_v_reg(ot, cpu_T0, rm);
> +            gen_op_mov_v_reg(ot, s->T0, rm);
>          }
>      bt_op:
>          tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
> @@ -6785,46 +6799,46 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              case 0: /* bt */
>                  /* Needs no atomic ops; we surpressed the normal
>                     memory load for LOCK above so do it now.  */
> -                gen_op_ld_v(s, ot, cpu_T0, s->A0);
> +                gen_op_ld_v(s, ot, s->T0, s->A0);
>                  break;
>              case 1: /* bts */
> -                tcg_gen_atomic_fetch_or_tl(cpu_T0, s->A0, cpu_tmp0,
> +                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, cpu_tmp0,
>                                             s->mem_index, ot | MO_LE);
>                  break;
>              case 2: /* btr */
>                  tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
> -                tcg_gen_atomic_fetch_and_tl(cpu_T0, s->A0, cpu_tmp0,
> +                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, cpu_tmp0,
>                                              s->mem_index, ot | MO_LE);
>                  break;
>              default:
>              case 3: /* btc */
> -                tcg_gen_atomic_fetch_xor_tl(cpu_T0, s->A0, cpu_tmp0,
> +                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, cpu_tmp0,
>                                              s->mem_index, ot | MO_LE);
>                  break;
>              }
> -            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
> +            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
>          } else {
> -            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
> +            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
>              switch (op) {
>              case 0: /* bt */
>                  /* Data already loaded; nothing to do.  */
>                  break;
>              case 1: /* bts */
> -                tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
> +                tcg_gen_or_tl(s->T0, s->T0, cpu_tmp0);
>                  break;
>              case 2: /* btr */
> -                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
> +                tcg_gen_andc_tl(s->T0, s->T0, cpu_tmp0);
>                  break;
>              default:
>              case 3: /* btc */
> -                tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
> +                tcg_gen_xor_tl(s->T0, s->T0, cpu_tmp0);
>                  break;
>              }
>              if (op != 0) {
>                  if (mod != 3) {
> -                    gen_op_st_v(s, ot, cpu_T0, s->A0);
> +                    gen_op_st_v(s, ot, s->T0, s->A0);
>                  } else {
> -                    gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                    gen_op_mov_reg_v(ot, rm, s->T0);
>                  }
>              }
>          }
> @@ -6865,7 +6879,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          modrm = x86_ldub_code(env, s);
>          reg = ((modrm >> 3) & 7) | rex_r;
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -        gen_extu(ot, cpu_T0);
> +        gen_extu(ot, s->T0);
>
>          /* Note that lzcnt and tzcnt are in different extensions.  */
>          if ((prefixes & PREFIX_REPZ)
> @@ -6874,23 +6888,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
>              int size = 8 << ot;
>              /* For lzcnt/tzcnt, C bit is defined related to the input. */
> -            tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> +            tcg_gen_mov_tl(cpu_cc_src, s->T0);
>              if (b & 1) {
>                  /* For lzcnt, reduce the target_ulong result by the
>                     number of zeros that we expect to find at the top.  */
> -                tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS);
> -                tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
> +                tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
> +                tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
>              } else {
>                  /* For tzcnt, a zero input must return the operand size.  */
> -                tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size);
> +                tcg_gen_ctzi_tl(s->T0, s->T0, size);
>              }
>              /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
> -            gen_op_update1_cc();
> +            gen_op_update1_cc(s);
>              set_cc_op(s, CC_OP_BMILGB + ot);
>          } else {
>              /* For bsr/bsf, only the Z bit is defined and it is related
>                 to the input and not the result.  */
> -            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
> +            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>              set_cc_op(s, CC_OP_LOGICB + ot);
>
>              /* ??? The manual says that the output is undefined when the
> @@ -6901,13 +6915,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  /* For bsr, return the bit index of the first 1 bit,
>                     not the count of leading zeros.  */
>                  tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
> -                tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1);
> -                tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
> +                tcg_gen_clz_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
>              } else {
> -                tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]);
> +                tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
>              }
>          }
> -        gen_op_mov_reg_v(ot, reg, cpu_T0);
> +        gen_op_mov_reg_v(ot, reg, s->T0);
>          break;
>          /************************/
>          /* bcd */
> @@ -7047,9 +7061,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          mod = (modrm >> 6) & 3;
>          if (mod == 3)
>              goto illegal_op;
> -        gen_op_mov_v_reg(ot, cpu_T0, reg);
> +        gen_op_mov_v_reg(ot, s->T0, reg);
>          gen_lea_modrm(env, s, modrm);
> -        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>          if (ot == MO_16) {
>              gen_helper_boundw(cpu_env, s->A0, cpu_tmp2_i32);
>          } else {
> @@ -7060,24 +7074,24 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = (b & 7) | REX_B(s);
>  #ifdef TARGET_X86_64
>          if (dflag == MO_64) {
> -            gen_op_mov_v_reg(MO_64, cpu_T0, reg);
> -            tcg_gen_bswap64_i64(cpu_T0, cpu_T0);
> -            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
> +            gen_op_mov_v_reg(MO_64, s->T0, reg);
> +            tcg_gen_bswap64_i64(s->T0, s->T0);
> +            gen_op_mov_reg_v(MO_64, reg, s->T0);
>          } else
>  #endif
>          {
> -            gen_op_mov_v_reg(MO_32, cpu_T0, reg);
> -            tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
> -            tcg_gen_bswap32_tl(cpu_T0, cpu_T0);
> -            gen_op_mov_reg_v(MO_32, reg, cpu_T0);
> +            gen_op_mov_v_reg(MO_32, s->T0, reg);
> +            tcg_gen_ext32u_tl(s->T0, s->T0);
> +            tcg_gen_bswap32_tl(s->T0, s->T0);
> +            gen_op_mov_reg_v(MO_32, reg, s->T0);
>          }
>          break;
>      case 0xd6: /* salc */
>          if (CODE64(s))
>              goto illegal_op;
> -        gen_compute_eflags_c(s, cpu_T0);
> -        tcg_gen_neg_tl(cpu_T0, cpu_T0);
> -        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
> +        gen_compute_eflags_c(s, s->T0);
> +        tcg_gen_neg_tl(s->T0, s->T0);
> +        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
>          break;
>      case 0xe0: /* loopnz */
>      case 0xe1: /* loopz */
> @@ -7229,7 +7243,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              if (!s->pe || s->vm86)
>                  goto illegal_op;
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
> -            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
> +            tcg_gen_ld32u_tl(s->T0, cpu_env,
>                               offsetof(CPUX86State, ldt.selector));
>              ot = mod == 3 ? dflag : MO_16;
>              gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> @@ -7242,7 +7256,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
>                  gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_lldt(cpu_env, cpu_tmp2_i32);
>              }
>              break;
> @@ -7250,7 +7264,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              if (!s->pe || s->vm86)
>                  goto illegal_op;
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
> -            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
> +            tcg_gen_ld32u_tl(s->T0, cpu_env,
>                               offsetof(CPUX86State, tr.selector));
>              ot = mod == 3 ? dflag : MO_16;
>              gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> @@ -7263,7 +7277,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
>                  gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> -                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
> +                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
>                  gen_helper_ltr(cpu_env, cpu_tmp2_i32);
>              }
>              break;
> @@ -7274,9 +7288,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
>              gen_update_cc_op(s);
>              if (op == 4) {
> -                gen_helper_verr(cpu_env, cpu_T0);
> +                gen_helper_verr(cpu_env, s->T0);
>              } else {
> -                gen_helper_verw(cpu_env, cpu_T0);
> +                gen_helper_verw(cpu_env, s->T0);
>              }
>              set_cc_op(s, CC_OP_EFLAGS);
>              break;
> @@ -7291,15 +7305,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          CASE_MODRM_MEM_OP(0): /* sgdt */
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
>              gen_lea_modrm(env, s, modrm);
> -            tcg_gen_ld32u_tl(cpu_T0,
> +            tcg_gen_ld32u_tl(s->T0,
>                               cpu_env, offsetof(CPUX86State, gdt.limit));
> -            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
> +            gen_op_st_v(s, MO_16, s->T0, s->A0);
>              gen_add_A0_im(s, 2);
> -            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
> +            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
>              if (dflag == MO_16) {
> -                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
> +                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
>              }
> -            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
> +            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
>              break;
>
>          case 0xc8: /* monitor */
> @@ -7347,14 +7361,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          CASE_MODRM_MEM_OP(1): /* sidt */
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
>              gen_lea_modrm(env, s, modrm);
> -            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
> -            gen_op_st_v(s, MO_16, cpu_T0, s->A0);
> +            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
> +            gen_op_st_v(s, MO_16, s->T0, s->A0);
>              gen_add_A0_im(s, 2);
> -            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
> +            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
>              if (dflag == MO_16) {
> -                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
> +                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
>              }
> -            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
> +            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
>              break;
>
>          case 0xd0: /* xgetbv */
> @@ -7500,11 +7514,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>              gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
>              gen_add_A0_im(s, 2);
> -            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
> +            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
>              if (dflag == MO_16) {
> -                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
> +                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
>              }
> -            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
> +            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
>              tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
>              break;
>
> @@ -7517,17 +7531,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_lea_modrm(env, s, modrm);
>              gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
>              gen_add_A0_im(s, 2);
> -            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, s->A0);
> +            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
>              if (dflag == MO_16) {
> -                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
> +                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
>              }
> -            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
> +            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
>              tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
>              break;
>
>          CASE_MODRM_OP(4): /* smsw */
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
> -            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
> +            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
>              if (CODE64(s)) {
>                  mod = (modrm >> 6) & 3;
>                  ot = (mod != 3 ? MO_16 : s->dflag);
> @@ -7560,7 +7574,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
>              gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> -            gen_helper_lmsw(cpu_env, cpu_T0);
> +            gen_helper_lmsw(cpu_env, s->T0);
>              gen_jmp_im(s->pc - s->cs_base);
>              gen_eob(s);
>              break;
> @@ -7584,10 +7598,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  if (s->cpl != 0) {
>                      gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>                  } else {
> -                    tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]);
> +                    tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
>                      tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
>                                    offsetof(CPUX86State, kernelgsbase));
> -                    tcg_gen_st_tl(cpu_T0, cpu_env,
> +                    tcg_gen_st_tl(s->T0, cpu_env,
>                                    offsetof(CPUX86State, kernelgsbase));
>                  }
>                  break;
> @@ -7638,16 +7652,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              rm = (modrm & 7) | REX_B(s);
>
>              if (mod == 3) {
> -                gen_op_mov_v_reg(MO_32, cpu_T0, rm);
> +                gen_op_mov_v_reg(MO_32, s->T0, rm);
>                  /* sign extend */
>                  if (d_ot == MO_64) {
> -                    tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
> +                    tcg_gen_ext32s_tl(s->T0, s->T0);
>                  }
> -                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
> +                gen_op_mov_reg_v(d_ot, reg, s->T0);
>              } else {
>                  gen_lea_modrm(env, s, modrm);
> -                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, s->A0);
> -                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
> +                gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
> +                gen_op_mov_reg_v(d_ot, reg, s->T0);
>              }
>          } else
>  #endif
> @@ -7712,9 +7726,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              t0 = tcg_temp_local_new();
>              gen_update_cc_op(s);
>              if (b == 0x102) {
> -                gen_helper_lar(t0, cpu_env, cpu_T0);
> +                gen_helper_lar(t0, cpu_env, s->T0);
>              } else {
> -                gen_helper_lsl(t0, cpu_env, cpu_T0);
> +                gen_helper_lsl(t0, cpu_env, s->T0);
>              }
>              tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
>              label1 = gen_new_label();
> @@ -7816,16 +7830,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  }
>                  gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
>                  if (a.index >= 0) {
> -                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
> +                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
>                  } else {
> -                    tcg_gen_movi_tl(cpu_T0, 0);
> +                    tcg_gen_movi_tl(s->T0, 0);
>                  }
>                  if (CODE64(s)) {
> -                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, cpu_T0);
> +                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
>                      tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
>                                     offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
>                  } else {
> -                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, cpu_T0);
> +                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
>                      tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
>                      tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
>                  }
> @@ -7921,15 +7935,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  }
>                  gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
>                  if (a.index >= 0) {
> -                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
> +                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
>                  } else {
> -                    tcg_gen_movi_tl(cpu_T0, 0);
> +                    tcg_gen_movi_tl(s->T0, 0);
>                  }
>                  if (CODE64(s)) {
> -                    gen_helper_bndstx64(cpu_env, s->A0, cpu_T0,
> +                    gen_helper_bndstx64(cpu_env, s->A0, s->T0,
>                                          cpu_bndl[reg], cpu_bndu[reg]);
>                  } else {
> -                    gen_helper_bndstx32(cpu_env, s->A0, cpu_T0,
> +                    gen_helper_bndstx32(cpu_env, s->A0, s->T0,
>                                          cpu_bndl[reg], cpu_bndu[reg]);
>                  }
>              }
> @@ -7973,9 +7987,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>                          gen_io_start();
>                      }
> -                    gen_op_mov_v_reg(ot, cpu_T0, rm);
> +                    gen_op_mov_v_reg(ot, s->T0, rm);
>                      gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
> -                                         cpu_T0);
> +                                         s->T0);
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>                          gen_io_end();
>                      }
> @@ -7985,8 +7999,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>                          gen_io_start();
>                      }
> -                    gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg));
> -                    gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                    gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
> +                    gen_op_mov_reg_v(ot, rm, s->T0);
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>                          gen_io_end();
>                      }
> @@ -8019,16 +8033,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              if (b & 2) {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
> -                gen_op_mov_v_reg(ot, cpu_T0, rm);
> +                gen_op_mov_v_reg(ot, s->T0, rm);
>                  tcg_gen_movi_i32(cpu_tmp2_i32, reg);
> -                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0);
> +                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, s->T0);
>                  gen_jmp_im(s->pc - s->cs_base);
>                  gen_eob(s);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
>                  tcg_gen_movi_i32(cpu_tmp2_i32, reg);
> -                gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32);
> -                gen_op_mov_reg_v(ot, rm, cpu_T0);
> +                gen_helper_get_dr(s->T0, cpu_env, cpu_tmp2_i32);
> +                gen_op_mov_reg_v(ot, rm, s->T0);
>              }
>          }
>          break;
> @@ -8107,8 +8121,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  break;
>              }
>              gen_lea_modrm(env, s, modrm);
> -            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
> -            gen_op_st_v(s, MO_32, cpu_T0, s->A0);
> +            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
> +            gen_op_st_v(s, MO_32, s->T0, s->A0);
>              break;
>
>          CASE_MODRM_MEM_OP(4): /* xsave */
> @@ -8287,10 +8301,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          }
>
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -        gen_extu(ot, cpu_T0);
> -        tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
> -        tcg_gen_ctpop_tl(cpu_T0, cpu_T0);
> -        gen_op_mov_reg_v(ot, reg, cpu_T0);
> +        gen_extu(ot, s->T0);
> +        tcg_gen_mov_tl(cpu_cc_src, s->T0);
> +        tcg_gen_ctpop_tl(s->T0, s->T0);
> +        gen_op_mov_reg_v(ot, reg, s->T0);
>
>          set_cc_op(s, CC_OP_POPCNT);
>          break;
> @@ -8456,7 +8470,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
>          printf("ERROR addseg\n");
>  #endif
>
> -    cpu_T0 = tcg_temp_new();
> +    dc->T0 = tcg_temp_new();
>      cpu_T1 = tcg_temp_new();
>      dc->A0 = tcg_temp_new();


--
Alex Bennée

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 " Emilio G. Cota
  2018-09-11 20:48   ` Richard Henderson
@ 2018-09-13 14:26   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Alex Bennée @ 2018-09-13 14:26 UTC (permalink / raw)
  To: Emilio G. Cota
  Cc: qemu-devel, Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost


Emilio G. Cota <cota@braap.org> writes:

> Signed-off-by: Emilio G. Cota <cota@braap.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  target/i386/translate.c | 341 ++++++++++++++++++++--------------------
>  1 file changed, 170 insertions(+), 171 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index 73fd7e5b9a..bd27e65344 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -78,8 +78,6 @@ static TCGv cpu_regs[CPU_NB_REGS];
>  static TCGv cpu_seg_base[6];
>  static TCGv_i64 cpu_bndl[4];
>  static TCGv_i64 cpu_bndu[4];
> -/* local temps */
> -static TCGv cpu_T1;
>  /* local register indexes (only used inside old micro ops) */
>  static TCGv cpu_tmp0, cpu_tmp4;
>  static TCGv_ptr cpu_ptr0, cpu_ptr1;
> @@ -139,6 +137,7 @@ typedef struct DisasContext {
>      TCGv cc_srcT;
>      TCGv A0;
>      TCGv T0;
> +    TCGv T1;
>
>      sigjmp_buf jmpbuf;
>  } DisasContext;
> @@ -656,20 +655,20 @@ static void gen_op_update1_cc(DisasContext *s)
>
>  static void gen_op_update2_cc(DisasContext *s)
>  {
> -    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> +    tcg_gen_mov_tl(cpu_cc_src, s->T1);
>      tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>  }
>
>  static void gen_op_update3_cc(DisasContext *s, TCGv reg)
>  {
>      tcg_gen_mov_tl(cpu_cc_src2, reg);
> -    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> +    tcg_gen_mov_tl(cpu_cc_src, s->T1);
>      tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>  }
>
>  static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
>  {
> -    tcg_gen_and_tl(cpu_cc_dst, s->T0, cpu_T1);
> +    tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
>  }
>
>  static void gen_op_update_neg_cc(DisasContext *s)
> @@ -1090,7 +1089,7 @@ static inline void gen_lods(DisasContext *s, TCGMemOp ot)
>  static inline void gen_scas(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_EDI(s);
> -    gen_op_ld_v(s, ot, cpu_T1, s->A0);
> +    gen_op_ld_v(s, ot, s->T1, s->A0);
>      gen_op(s, OP_CMPL, ot, R_EAX);
>      gen_op_movl_T0_Dshift(s, ot);
>      gen_op_add_reg_T0(s, s->aflag, R_EDI);
> @@ -1099,7 +1098,7 @@ static inline void gen_scas(DisasContext *s, TCGMemOp ot)
>  static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_EDI(s);
> -    gen_op_ld_v(s, ot, cpu_T1, s->A0);
> +    gen_op_ld_v(s, ot, s->T1, s->A0);
>      gen_string_movl_A0_ESI(s);
>      gen_op(s, OP_CMPL, ot, OR_TMP0);
>      gen_op_movl_T0_Dshift(s, ot);
> @@ -1274,11 +1273,11 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      case OP_ADCL:
>          gen_compute_eflags_c(s1, cpu_tmp4);
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_add_tl(s1->T0, cpu_tmp4, cpu_T1);
> +            tcg_gen_add_tl(s1->T0, cpu_tmp4, s1->T1);
>              tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
>              tcg_gen_add_tl(s1->T0, s1->T0, cpu_tmp4);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> @@ -1288,12 +1287,12 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      case OP_SBBL:
>          gen_compute_eflags_c(s1, cpu_tmp4);
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_add_tl(s1->T0, cpu_T1, cpu_tmp4);
> +            tcg_gen_add_tl(s1->T0, s1->T1, cpu_tmp4);
>              tcg_gen_neg_tl(s1->T0, s1->T0);
>              tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
>              tcg_gen_sub_tl(s1->T0, s1->T0, cpu_tmp4);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
> @@ -1302,10 +1301,10 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_ADDL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_add_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
>          gen_op_update2_cc(s1);
> @@ -1313,13 +1312,13 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_SUBL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_neg_tl(s1->T0, cpu_T1);
> +            tcg_gen_neg_tl(s1->T0, s1->T1);
>              tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
>                                          s1->mem_index, ot | MO_LE);
> -            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, cpu_T1);
> +            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
>          } else {
>              tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
> -            tcg_gen_sub_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
>          gen_op_update2_cc(s1);
> @@ -1328,10 +1327,10 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>      default:
>      case OP_ANDL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_and_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
>          gen_op_update1_cc(s1);
> @@ -1339,10 +1338,10 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_ORL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
>                                         s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_or_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
>          gen_op_update1_cc(s1);
> @@ -1350,19 +1349,19 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>          break;
>      case OP_XORL:
>          if (s1->prefix & PREFIX_LOCK) {
> -            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, cpu_T1,
> +            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
>                                          s1->mem_index, ot | MO_LE);
>          } else {
> -            tcg_gen_xor_tl(s1->T0, s1->T0, cpu_T1);
> +            tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
>              gen_op_st_rm_T0_A0(s1, ot, d);
>          }
>          gen_op_update1_cc(s1);
>          set_cc_op(s1, CC_OP_LOGICB + ot);
>          break;
>      case OP_CMPL:
> -        tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
> +        tcg_gen_mov_tl(cpu_cc_src, s1->T1);
>          tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
> -        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, cpu_T1);
> +        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
>          set_cc_op(s1, CC_OP_SUBB + ot);
>          break;
>      }
> @@ -1447,28 +1446,28 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>          gen_op_mov_v_reg(ot, s->T0, op1);
>      }
>
> -    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
> -    tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
> +    tcg_gen_andi_tl(s->T1, s->T1, mask);
> +    tcg_gen_subi_tl(cpu_tmp0, s->T1, 1);
>
>      if (is_right) {
>          if (is_arith) {
>              gen_exts(ot, s->T0);
>              tcg_gen_sar_tl(cpu_tmp0, s->T0, cpu_tmp0);
> -            tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
> +            tcg_gen_sar_tl(s->T0, s->T0, s->T1);
>          } else {
>              gen_extu(ot, s->T0);
>              tcg_gen_shr_tl(cpu_tmp0, s->T0, cpu_tmp0);
> -            tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
> +            tcg_gen_shr_tl(s->T0, s->T0, s->T1);
>          }
>      } else {
>          tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
> -        tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
> +        tcg_gen_shl_tl(s->T0, s->T0, s->T1);
>      }
>
>      /* store */
>      gen_op_st_rm_T0_A0(s, ot, op1);
>
> -    gen_shift_flags(s, ot, s->T0, cpu_tmp0, cpu_T1, is_right);
> +    gen_shift_flags(s, ot, s->T0, cpu_tmp0, s->T1, is_right);
>  }
>
>  static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
> @@ -1523,7 +1522,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>          gen_op_mov_v_reg(ot, s->T0, op1);
>      }
>
> -    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
> +    tcg_gen_andi_tl(s->T1, s->T1, mask);
>
>      switch (ot) {
>      case MO_8:
> @@ -1539,7 +1538,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>  #ifdef TARGET_X86_64
>      case MO_32:
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
> +        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
>          if (is_right) {
>              tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
>          } else {
> @@ -1550,9 +1549,9 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>  #endif
>      default:
>          if (is_right) {
> -            tcg_gen_rotr_tl(s->T0, s->T0, cpu_T1);
> +            tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
>          } else {
> -            tcg_gen_rotl_tl(s->T0, s->T0, cpu_T1);
> +            tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
>          }
>          break;
>      }
> @@ -1584,7 +1583,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>         exactly as we computed above.  */
>      t0 = tcg_const_i32(0);
>      t1 = tcg_temp_new_i32();
> -    tcg_gen_trunc_tl_i32(t1, cpu_T1);
> +    tcg_gen_trunc_tl_i32(t1, s->T1);
>      tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
>      tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
>      tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
> @@ -1689,17 +1688,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>      if (is_right) {
>          switch (ot) {
>          case MO_8:
> -            gen_helper_rcrb(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
>              break;
>          case MO_16:
> -            gen_helper_rcrw(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
>              break;
>          case MO_32:
> -            gen_helper_rcrl(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
>              break;
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_helper_rcrq(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
>              break;
>  #endif
>          default:
> @@ -1708,17 +1707,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>      } else {
>          switch (ot) {
>          case MO_8:
> -            gen_helper_rclb(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
>              break;
>          case MO_16:
> -            gen_helper_rclw(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
>              break;
>          case MO_32:
> -            gen_helper_rcll(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
>              break;
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_helper_rclq(s->T0, cpu_env, s->T0, cpu_T1);
> +            gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
>              break;
>  #endif
>          default:
> @@ -1752,11 +1751,11 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>             This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
>             portion by constructing it as a 32-bit value.  */
>          if (is_right) {
> -            tcg_gen_deposit_tl(cpu_tmp0, s->T0, cpu_T1, 16, 16);
> -            tcg_gen_mov_tl(cpu_T1, s->T0);
> +            tcg_gen_deposit_tl(cpu_tmp0, s->T0, s->T1, 16, 16);
> +            tcg_gen_mov_tl(s->T1, s->T0);
>              tcg_gen_mov_tl(s->T0, cpu_tmp0);
>          } else {
> -            tcg_gen_deposit_tl(cpu_T1, s->T0, cpu_T1, 16, 16);
> +            tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
>          }
>          /* FALLTHRU */
>  #ifdef TARGET_X86_64
> @@ -1764,11 +1763,11 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>          /* Concatenate the two 32-bit values and use a 64-bit shift.  */
>          tcg_gen_subi_tl(cpu_tmp0, count, 1);
>          if (is_right) {
> -            tcg_gen_concat_tl_i64(s->T0, s->T0, cpu_T1);
> +            tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
>              tcg_gen_shr_i64(cpu_tmp0, s->T0, cpu_tmp0);
>              tcg_gen_shr_i64(s->T0, s->T0, count);
>          } else {
> -            tcg_gen_concat_tl_i64(s->T0, cpu_T1, s->T0);
> +            tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
>              tcg_gen_shl_i64(cpu_tmp0, s->T0, cpu_tmp0);
>              tcg_gen_shl_i64(s->T0, s->T0, count);
>              tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
> @@ -1783,24 +1782,24 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>
>              tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
>              tcg_gen_shr_tl(s->T0, s->T0, count);
> -            tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
> +            tcg_gen_shl_tl(s->T1, s->T1, cpu_tmp4);
>          } else {
>              tcg_gen_shl_tl(cpu_tmp0, s->T0, cpu_tmp0);
>              if (ot == MO_16) {
>                  /* Only needed if count > 16, for Intel behaviour.  */
>                  tcg_gen_subfi_tl(cpu_tmp4, 33, count);
> -                tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
> +                tcg_gen_shr_tl(cpu_tmp4, s->T1, cpu_tmp4);
>                  tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
>              }
>
>              tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
>              tcg_gen_shl_tl(s->T0, s->T0, count);
> -            tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
> +            tcg_gen_shr_tl(s->T1, s->T1, cpu_tmp4);
>          }
>          tcg_gen_movi_tl(cpu_tmp4, 0);
> -        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
> -                           cpu_tmp4, cpu_T1);
> -        tcg_gen_or_tl(s->T0, s->T0, cpu_T1);
> +        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, cpu_tmp4,
> +                           cpu_tmp4, s->T1);
> +        tcg_gen_or_tl(s->T0, s->T0, s->T1);
>          break;
>      }
>
> @@ -1814,7 +1813,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>  static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
>  {
>      if (s != OR_TMP1)
> -        gen_op_mov_v_reg(ot, cpu_T1, s);
> +        gen_op_mov_v_reg(ot, s1->T1, s);
>      switch(op) {
>      case OP_ROL:
>          gen_rot_rm_T1(s1, ot, d, 0);
> @@ -1862,7 +1861,7 @@ static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
>          break;
>      default:
>          /* currently not optimized */
> -        tcg_gen_movi_tl(cpu_T1, c);
> +        tcg_gen_movi_tl(s1->T1, c);
>          gen_shift(s1, op, ot, d, OR_TMP1);
>          break;
>      }
> @@ -2242,7 +2241,7 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
>
>      gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>
> -    cc = gen_prepare_cc(s, b, cpu_T1);
> +    cc = gen_prepare_cc(s, b, s->T1);
>      if (cc.mask != -1) {
>          TCGv t0 = tcg_temp_new();
>          tcg_gen_andi_tl(t0, cc.reg, cc.mask);
> @@ -2416,8 +2415,8 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
>      int size = 1 << d_ot;
>
>      /* Push BP; compute FrameTemp into T1.  */
> -    tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
> -    gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
> +    tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
> +    gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
>      gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
>
>      level &= 31;
> @@ -2430,23 +2429,23 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
>              gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
>              gen_op_ld_v(s, d_ot, cpu_tmp0, s->A0);
>
> -            tcg_gen_subi_tl(s->A0, cpu_T1, size * i);
> +            tcg_gen_subi_tl(s->A0, s->T1, size * i);
>              gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
>              gen_op_st_v(s, d_ot, cpu_tmp0, s->A0);
>          }
>
>          /* Push the current FrameTemp as the last level.  */
> -        tcg_gen_subi_tl(s->A0, cpu_T1, size * level);
> +        tcg_gen_subi_tl(s->A0, s->T1, size * level);
>          gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
> -        gen_op_st_v(s, d_ot, cpu_T1, s->A0);
> +        gen_op_st_v(s, d_ot, s->T1, s->A0);
>      }
>
>      /* Copy the FrameTemp value to EBP.  */
> -    gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
> +    gen_op_mov_reg_v(a_ot, R_EBP, s->T1);
>
>      /* Compute the final value of ESP.  */
> -    tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
> -    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
> +    tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
> +    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
>  }
>
>  static void gen_leave(DisasContext *s)
> @@ -2457,10 +2456,10 @@ static void gen_leave(DisasContext *s)
>      gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
>      gen_op_ld_v(s, d_ot, s->T0, s->A0);
>
> -    tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
> +    tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
>
>      gen_op_mov_reg_v(d_ot, R_EBP, s->T0);
> -    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
> +    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
>  }
>
>  static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
> @@ -3854,10 +3853,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
>                                         s->A0, bound);
>                      tcg_temp_free(bound);
> -                    tcg_gen_movi_tl(cpu_T1, 1);
> -                    tcg_gen_shl_tl(cpu_T1, cpu_T1, s->A0);
> -                    tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
> -                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_movi_tl(s->T1, 1);
> +                    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
> +                    tcg_gen_subi_tl(s->T1, s->T1, 1);
> +                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
>
>                      gen_op_mov_reg_v(ot, reg, s->T0);
>                      gen_op_update1_cc(s);
> @@ -3873,19 +3872,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  }
>                  ot = mo_64_32(s->dflag);
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -                tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
> +                tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
>                  {
>                      TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
>                      /* Note that since we're using BMILG (in order to get O
>                         cleared) we need to store the inverse into C.  */
>                      tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
> -                                       cpu_T1, bound);
> -                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
> -                                       bound, bound, cpu_T1);
> +                                       s->T1, bound);
> +                    tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
> +                                       bound, bound, s->T1);
>                      tcg_temp_free(bound);
>                  }
>                  tcg_gen_movi_tl(s->A0, -1);
> -                tcg_gen_shl_tl(s->A0, s->A0, cpu_T1);
> +                tcg_gen_shl_tl(s->A0, s->A0, s->T1);
>                  tcg_gen_andc_tl(s->T0, s->T0, s->A0);
>                  gen_op_mov_reg_v(ot, reg, s->T0);
>                  gen_op_update1_cc(s);
> @@ -3911,10 +3910,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      break;
>  #ifdef TARGET_X86_64
>                  case MO_64:
> -                    tcg_gen_mulu2_i64(s->T0, cpu_T1,
> +                    tcg_gen_mulu2_i64(s->T0, s->T1,
>                                        s->T0, cpu_regs[R_EDX]);
>                      tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
> -                    tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
> +                    tcg_gen_mov_i64(cpu_regs[reg], s->T1);
>                      break;
>  #endif
>                  }
> @@ -3931,11 +3930,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  /* Note that by zero-extending the mask operand, we
>                     automatically handle zero-extending the result.  */
>                  if (ot == MO_64) {
> -                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
> +                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
>                  } else {
> -                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
> +                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
>                  }
> -                gen_helper_pdep(cpu_regs[reg], s->T0, cpu_T1);
> +                gen_helper_pdep(cpu_regs[reg], s->T0, s->T1);
>                  break;
>
>              case 0x2f5: /* pext Gy, By, Ey */
> @@ -3949,11 +3948,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  /* Note that by zero-extending the mask operand, we
>                     automatically handle zero-extending the result.  */
>                  if (ot == MO_64) {
> -                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
> +                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
>                  } else {
> -                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
> +                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
>                  }
> -                gen_helper_pext(cpu_regs[reg], s->T0, cpu_T1);
> +                gen_helper_pext(cpu_regs[reg], s->T0, s->T1);
>                  break;
>
>              case 0x1f6: /* adcx Gy, Ey */
> @@ -4045,22 +4044,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  ot = mo_64_32(s->dflag);
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>                  if (ot == MO_64) {
> -                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
> +                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
>                  } else {
> -                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
> +                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
>                  }
>                  if (b == 0x1f7) {
> -                    tcg_gen_shl_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_shl_tl(s->T0, s->T0, s->T1);
>                  } else if (b == 0x2f7) {
>                      if (ot != MO_64) {
>                          tcg_gen_ext32s_tl(s->T0, s->T0);
>                      }
> -                    tcg_gen_sar_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_sar_tl(s->T0, s->T0, s->T1);
>                  } else {
>                      if (ot != MO_64) {
>                          tcg_gen_ext32u_tl(s->T0, s->T0);
>                      }
> -                    tcg_gen_shr_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_shr_tl(s->T0, s->T0, s->T1);
>                  }
>                  gen_op_mov_reg_v(ot, reg, s->T0);
>                  break;
> @@ -4080,16 +4079,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  tcg_gen_mov_tl(cpu_cc_src, s->T0);
>                  switch (reg & 7) {
>                  case 1: /* blsr By,Ey */
> -                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
> -                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_subi_tl(s->T1, s->T0, 1);
> +                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
>                      break;
>                  case 2: /* blsmsk By,Ey */
> -                    tcg_gen_subi_tl(cpu_T1, s->T0, 1);
> -                    tcg_gen_xor_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_subi_tl(s->T1, s->T0, 1);
> +                    tcg_gen_xor_tl(s->T0, s->T0, s->T1);
>                      break;
>                  case 3: /* blsi By, Ey */
> -                    tcg_gen_neg_tl(cpu_T1, s->T0);
> -                    tcg_gen_and_tl(s->T0, s->T0, cpu_T1);
> +                    tcg_gen_neg_tl(s->T1, s->T0);
> +                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
>                      break;
>                  default:
>                      goto unknown_op;
> @@ -4677,7 +4676,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  } else {
>                      opreg = rm;
>                  }
> -                gen_op_mov_v_reg(ot, cpu_T1, reg);
> +                gen_op_mov_v_reg(ot, s->T1, reg);
>                  gen_op(s, op, ot, opreg);
>                  break;
>              case 1: /* OP Gv, Ev */
> @@ -4687,17 +4686,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3) {
>                      gen_lea_modrm(env, s, modrm);
> -                    gen_op_ld_v(s, ot, cpu_T1, s->A0);
> +                    gen_op_ld_v(s, ot, s->T1, s->A0);
>                  } else if (op == OP_XORL && rm == reg) {
>                      goto xor_zero;
>                  } else {
> -                    gen_op_mov_v_reg(ot, cpu_T1, rm);
> +                    gen_op_mov_v_reg(ot, s->T1, rm);
>                  }
>                  gen_op(s, op, ot, reg);
>                  break;
>              case 2: /* OP A, Iv */
>                  val = insn_get(env, s, ot);
> -                tcg_gen_movi_tl(cpu_T1, val);
> +                tcg_gen_movi_tl(s->T1, val);
>                  gen_op(s, op, ot, OR_EAX);
>                  break;
>              }
> @@ -4743,7 +4742,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  val = (int8_t)insn_get(env, s, MO_8);
>                  break;
>              }
> -            tcg_gen_movi_tl(cpu_T1, val);
> +            tcg_gen_movi_tl(s->T1, val);
>              gen_op(s, op, ot, opreg);
>          }
>          break;
> @@ -4783,7 +4782,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          switch(op) {
>          case 0: /* test */
>              val = insn_get(env, s, ot);
> -            tcg_gen_movi_tl(cpu_T1, val);
> +            tcg_gen_movi_tl(s->T1, val);
>              gen_op_testl_T0_T1_cc(s);
>              set_cc_op(s, CC_OP_LOGICB + ot);
>              break;
> @@ -4847,22 +4846,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 4: /* mul */
>              switch(ot) {
>              case MO_8:
> -                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
> +                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
>                  tcg_gen_ext8u_tl(s->T0, s->T0);
> -                tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
> +                tcg_gen_ext8u_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
>                  gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
>                  set_cc_op(s, CC_OP_MULB);
>                  break;
>              case MO_16:
> -                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
> +                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
>                  tcg_gen_ext16u_tl(s->T0, s->T0);
> -                tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
> +                tcg_gen_ext16u_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
>                  gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_shri_tl(s->T0, s->T0, 16);
> @@ -4896,11 +4895,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 5: /* imul */
>              switch(ot) {
>              case MO_8:
> -                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
> +                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
>                  tcg_gen_ext8s_tl(s->T0, s->T0);
> -                tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
> +                tcg_gen_ext8s_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
>                  gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_ext8s_tl(cpu_tmp0, s->T0);
> @@ -4908,11 +4907,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  set_cc_op(s, CC_OP_MULB);
>                  break;
>              case MO_16:
> -                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
> +                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
>                  tcg_gen_ext16s_tl(s->T0, s->T0);
> -                tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
> +                tcg_gen_ext16s_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
> -                tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
>                  gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
> @@ -5041,25 +5040,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  tcg_gen_ext16u_tl(s->T0, s->T0);
>              }
>              next_eip = s->pc - s->cs_base;
> -            tcg_gen_movi_tl(cpu_T1, next_eip);
> -            gen_push_v(s, cpu_T1);
> +            tcg_gen_movi_tl(s->T1, next_eip);
> +            gen_push_v(s, s->T1);
>              gen_op_jmp_v(s->T0);
>              gen_bnd_jmp(s);
>              gen_jr(s, s->T0);
>              break;
>          case 3: /* lcall Ev */
> -            gen_op_ld_v(s, ot, cpu_T1, s->A0);
> +            gen_op_ld_v(s, ot, s->T1, s->A0);
>              gen_add_A0_im(s, 1 << ot);
>              gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          do_lcall:
>              if (s->pe && !s->vm86) {
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
> +                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, s->T1,
>                                             tcg_const_i32(dflag - 1),
>                                             tcg_const_tl(s->pc - s->cs_base));
>              } else {
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
> +                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, s->T1,
>                                        tcg_const_i32(dflag - 1),
>                                        tcg_const_i32(s->pc - s->cs_base));
>              }
> @@ -5075,17 +5074,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_jr(s, s->T0);
>              break;
>          case 5: /* ljmp Ev */
> -            gen_op_ld_v(s, ot, cpu_T1, s->A0);
> +            gen_op_ld_v(s, ot, s->T1, s->A0);
>              gen_add_A0_im(s, 1 << ot);
>              gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          do_ljmp:
>              if (s->pe && !s->vm86) {
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
> +                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, s->T1,
>                                            tcg_const_tl(s->pc - s->cs_base));
>              } else {
>                  gen_op_movl_seg_T0_vm(s, R_CS);
> -                gen_op_jmp_v(cpu_T1);
> +                gen_op_jmp_v(s->T1);
>              }
>              tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
>              gen_jr(s, cpu_tmp4);
> @@ -5106,7 +5105,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -        gen_op_mov_v_reg(ot, cpu_T1, reg);
> +        gen_op_mov_v_reg(ot, s->T1, reg);
>          gen_op_testl_T0_T1_cc(s);
>          set_cc_op(s, CC_OP_LOGICB + ot);
>          break;
> @@ -5117,7 +5116,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          val = insn_get(env, s, ot);
>
>          gen_op_mov_v_reg(ot, s->T0, OR_EAX);
> -        tcg_gen_movi_tl(cpu_T1, val);
> +        tcg_gen_movi_tl(s->T1, val);
>          gen_op_testl_T0_T1_cc(s);
>          set_cc_op(s, CC_OP_LOGICB + ot);
>          break;
> @@ -5183,25 +5182,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>          if (b == 0x69) {
>              val = insn_get(env, s, ot);
> -            tcg_gen_movi_tl(cpu_T1, val);
> +            tcg_gen_movi_tl(s->T1, val);
>          } else if (b == 0x6b) {
>              val = (int8_t)insn_get(env, s, MO_8);
> -            tcg_gen_movi_tl(cpu_T1, val);
> +            tcg_gen_movi_tl(s->T1, val);
>          } else {
> -            gen_op_mov_v_reg(ot, cpu_T1, reg);
> +            gen_op_mov_v_reg(ot, s->T1, reg);
>          }
>          switch (ot) {
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, s->T0, cpu_T1);
> +            tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
>              tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
>              tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
> -            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
> +            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
>              break;
>  #endif
>          case MO_32:
>              tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
> +            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
>              tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
>                                cpu_tmp2_i32, cpu_tmp3_i32);
>              tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
> @@ -5212,9 +5211,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              break;
>          default:
>              tcg_gen_ext16s_tl(s->T0, s->T0);
> -            tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
> +            tcg_gen_ext16s_tl(s->T1, s->T1);
>              /* XXX: use 32 bit mul which could be faster */
> -            tcg_gen_mul_tl(s->T0, s->T0, cpu_T1);
> +            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
>              tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>              tcg_gen_ext16s_tl(cpu_tmp0, s->T0);
>              tcg_gen_sub_tl(cpu_cc_src, s->T0, cpu_tmp0);
> @@ -5232,22 +5231,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_op_mov_v_reg(ot, s->T0, reg);
>          if (mod == 3) {
>              rm = (modrm & 7) | REX_B(s);
> -            gen_op_mov_v_reg(ot, cpu_T1, rm);
> -            tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
> -            gen_op_mov_reg_v(ot, reg, cpu_T1);
> +            gen_op_mov_v_reg(ot, s->T1, rm);
> +            tcg_gen_add_tl(s->T0, s->T0, s->T1);
> +            gen_op_mov_reg_v(ot, reg, s->T1);
>              gen_op_mov_reg_v(ot, rm, s->T0);
>          } else {
>              gen_lea_modrm(env, s, modrm);
>              if (s->prefix & PREFIX_LOCK) {
> -                tcg_gen_atomic_fetch_add_tl(cpu_T1, s->A0, s->T0,
> +                tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
>                                              s->mem_index, ot | MO_LE);
> -                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_add_tl(s->T0, s->T0, s->T1);
>              } else {
> -                gen_op_ld_v(s, ot, cpu_T1, s->A0);
> -                tcg_gen_add_tl(s->T0, s->T0, cpu_T1);
> +                gen_op_ld_v(s, ot, s->T1, s->A0);
> +                tcg_gen_add_tl(s->T0, s->T0, s->T1);
>                  gen_op_st_v(s, ot, s->T0, s->A0);
>              }
> -            gen_op_mov_reg_v(ot, reg, cpu_T1);
> +            gen_op_mov_reg_v(ot, reg, s->T1);
>          }
>          gen_op_update2_cc(s);
>          set_cc_op(s, CC_OP_ADDB + ot);
> @@ -5653,16 +5652,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              rm = (modrm & 7) | REX_B(s);
>          do_xchg_reg:
>              gen_op_mov_v_reg(ot, s->T0, reg);
> -            gen_op_mov_v_reg(ot, cpu_T1, rm);
> +            gen_op_mov_v_reg(ot, s->T1, rm);
>              gen_op_mov_reg_v(ot, rm, s->T0);
> -            gen_op_mov_reg_v(ot, reg, cpu_T1);
> +            gen_op_mov_reg_v(ot, reg, s->T1);
>          } else {
>              gen_lea_modrm(env, s, modrm);
>              gen_op_mov_v_reg(ot, s->T0, reg);
>              /* for xchg, lock is implicit */
> -            tcg_gen_atomic_xchg_tl(cpu_T1, s->A0, s->T0,
> +            tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
>                                     s->mem_index, ot | MO_LE);
> -            gen_op_mov_reg_v(ot, reg, cpu_T1);
> +            gen_op_mov_reg_v(ot, reg, s->T1);
>          }
>          break;
>      case 0xc4: /* les Gv */
> @@ -5689,13 +5688,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod == 3)
>              goto illegal_op;
>          gen_lea_modrm(env, s, modrm);
> -        gen_op_ld_v(s, ot, cpu_T1, s->A0);
> +        gen_op_ld_v(s, ot, s->T1, s->A0);
>          gen_add_A0_im(s, 1 << ot);
>          /* load the segment first to handle exceptions properly */
>          gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          gen_movl_seg_T0(s, op);
>          /* then put the data */
> -        gen_op_mov_reg_v(ot, reg, cpu_T1);
> +        gen_op_mov_reg_v(ot, reg, s->T1);
>          if (s->base.is_jmp) {
>              gen_jmp_im(s->pc - s->cs_base);
>              gen_eob(s);
> @@ -5774,7 +5773,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          } else {
>              opreg = rm;
>          }
> -        gen_op_mov_v_reg(ot, cpu_T1, reg);
> +        gen_op_mov_v_reg(ot, s->T1, reg);
>
>          if (shift) {
>              TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
> @@ -6387,8 +6386,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_io_start();
>  	}
>          tcg_gen_movi_i32(cpu_tmp2_i32, val);
> -        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
> -        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
> +        gen_helper_in_func(ot, s->T1, cpu_tmp2_i32);
> +        gen_op_mov_reg_v(ot, R_EAX, s->T1);
>          gen_bpt_io(s, cpu_tmp2_i32, ot);
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_end();
> @@ -6402,13 +6401,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          tcg_gen_movi_tl(s->T0, val);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes));
> -        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
> +        gen_op_mov_v_reg(ot, s->T1, R_EAX);
>
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_start();
>  	}
>          tcg_gen_movi_i32(cpu_tmp2_i32, val);
> -        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
> +        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
>          gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
>          gen_bpt_io(s, cpu_tmp2_i32, ot);
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
> @@ -6426,8 +6425,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_io_start();
>  	}
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
> -        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
> +        gen_helper_in_func(ot, s->T1, cpu_tmp2_i32);
> +        gen_op_mov_reg_v(ot, R_EAX, s->T1);
>          gen_bpt_io(s, cpu_tmp2_i32, ot);
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_end();
> @@ -6440,13 +6439,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes));
> -        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
> +        gen_op_mov_v_reg(ot, s->T1, R_EAX);
>
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_start();
>  	}
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, s->T0);
> -        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
> +        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, s->T1);
>          gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
>          gen_bpt_io(s, cpu_tmp2_i32, ot);
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
> @@ -6552,7 +6551,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              selector = insn_get(env, s, MO_16);
>
>              tcg_gen_movi_tl(s->T0, selector);
> -            tcg_gen_movi_tl(cpu_T1, offset);
> +            tcg_gen_movi_tl(s->T1, offset);
>          }
>          goto do_lcall;
>      case 0xe9: /* jmp im */
> @@ -6581,7 +6580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              selector = insn_get(env, s, MO_16);
>
>              tcg_gen_movi_tl(s->T0, selector);
> -            tcg_gen_movi_tl(cpu_T1, offset);
> +            tcg_gen_movi_tl(s->T1, offset);
>          }
>          goto do_ljmp;
>      case 0xeb: /* jmp Jb */
> @@ -6753,7 +6752,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          }
>          /* load shift */
>          val = x86_ldub_code(env, s);
> -        tcg_gen_movi_tl(cpu_T1, val);
> +        tcg_gen_movi_tl(s->T1, val);
>          if (op < 4)
>              goto unknown_op;
>          op -= 4;
> @@ -6775,12 +6774,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>          mod = (modrm >> 6) & 3;
>          rm = (modrm & 7) | REX_B(s);
> -        gen_op_mov_v_reg(MO_32, cpu_T1, reg);
> +        gen_op_mov_v_reg(MO_32, s->T1, reg);
>          if (mod != 3) {
>              AddressParts a = gen_lea_modrm_0(env, s, modrm);
>              /* specific case: we need to add a displacement */
> -            gen_exts(ot, cpu_T1);
> -            tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
> +            gen_exts(ot, s->T1);
> +            tcg_gen_sari_tl(cpu_tmp0, s->T1, 3 + ot);
>              tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
>              tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), cpu_tmp0);
>              gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
> @@ -6791,9 +6790,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_op_mov_v_reg(ot, s->T0, rm);
>          }
>      bt_op:
> -        tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
> +        tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
>          tcg_gen_movi_tl(cpu_tmp0, 1);
> -        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
> +        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, s->T1);
>          if (s->prefix & PREFIX_LOCK) {
>              switch (op) {
>              case 0: /* bt */
> @@ -6816,9 +6815,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                                              s->mem_index, ot | MO_LE);
>                  break;
>              }
> -            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
> +            tcg_gen_shr_tl(cpu_tmp4, s->T0, s->T1);
>          } else {
> -            tcg_gen_shr_tl(cpu_tmp4, s->T0, cpu_T1);
> +            tcg_gen_shr_tl(cpu_tmp4, s->T0, s->T1);
>              switch (op) {
>              case 0: /* bt */
>                  /* Data already loaded; nothing to do.  */
> @@ -6914,8 +6913,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              if (b & 1) {
>                  /* For bsr, return the bit index of the first 1 bit,
>                     not the count of leading zeros.  */
> -                tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
> -                tcg_gen_clz_tl(s->T0, s->T0, cpu_T1);
> +                tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
> +                tcg_gen_clz_tl(s->T0, s->T0, s->T1);
>                  tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
>              } else {
>                  tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
> @@ -7512,14 +7511,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
>              gen_lea_modrm(env, s, modrm);
> -            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
> +            gen_op_ld_v(s, MO_16, s->T1, s->A0);
>              gen_add_A0_im(s, 2);
>              gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
>              if (dflag == MO_16) {
>                  tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
>              }
>              tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
> -            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
> +            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
>              break;
>
>          CASE_MODRM_MEM_OP(3): /* lidt */
> @@ -7529,14 +7528,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
>              gen_lea_modrm(env, s, modrm);
> -            gen_op_ld_v(s, MO_16, cpu_T1, s->A0);
> +            gen_op_ld_v(s, MO_16, s->T1, s->A0);
>              gen_add_A0_im(s, 2);
>              gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
>              if (dflag == MO_16) {
>                  tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
>              }
>              tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
> -            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
> +            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
>              break;
>
>          CASE_MODRM_OP(4): /* smsw */
> @@ -8471,7 +8470,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
>  #endif
>
>      dc->T0 = tcg_temp_new();
> -    cpu_T1 = tcg_temp_new();
> +    dc->T1 = tcg_temp_new();
>      dc->A0 = tcg_temp_new();
>
>      cpu_tmp0 = tcg_temp_new();


--
Alex Bennée

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs to DisasContext
  2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs " Emilio G. Cota
  2018-09-11 20:58   ` Richard Henderson
@ 2018-09-13 14:31   ` Alex Bennée
  1 sibling, 0 replies; 32+ messages in thread
From: Alex Bennée @ 2018-09-13 14:31 UTC (permalink / raw)
  To: Emilio G. Cota
  Cc: qemu-devel, Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost


Emilio G. Cota <cota@braap.org> writes:

> And convert it to a bool to use an existing hole
> in the struct.
>
> Signed-off-by: Emilio G. Cota <cota@braap.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  target/i386/translate.c | 307 ++++++++++++++++++++--------------------
>  1 file changed, 154 insertions(+), 153 deletions(-)
>
> diff --git a/target/i386/translate.c b/target/i386/translate.c
> index 61a98ef872..b8222dc4ba 100644
> --- a/target/i386/translate.c
> +++ b/target/i386/translate.c
> @@ -81,10 +81,6 @@ static TCGv_i64 cpu_bndu[4];
>
>  #include "exec/gen-icount.h"
>
> -#ifdef TARGET_X86_64
> -static int x86_64_hregs;
> -#endif
> -
>  typedef struct DisasContext {
>      DisasContextBase base;
>
> @@ -109,6 +105,9 @@ typedef struct DisasContext {
>      int ss32;   /* 32 bit stack segment */
>      CCOp cc_op;  /* current CC operation */
>      bool cc_op_dirty;
> +#ifdef TARGET_X86_64
> +    bool x86_64_hregs;
> +#endif
>      int addseg; /* non zero if either DS/ES/SS have a non zero base */
>      int f_st;   /* currently unused */
>      int vm86;   /* vm86 mode */
> @@ -307,13 +306,13 @@ static void gen_update_cc_op(DisasContext *s)
>   * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
>   * true for this special case, false otherwise.
>   */
> -static inline bool byte_reg_is_xH(int reg)
> +static inline bool byte_reg_is_xH(DisasContext *s, int reg)
>  {
>      if (reg < 4) {
>          return false;
>      }
>  #ifdef TARGET_X86_64
> -    if (reg >= 8 || x86_64_hregs) {
> +    if (reg >= 8 || s->x86_64_hregs) {
>          return false;
>      }
>  #endif
> @@ -360,11 +359,11 @@ static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
>      return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
>  }
>
> -static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
> +static void gen_op_mov_reg_v(DisasContext *s, TCGMemOp ot, int reg, TCGv t0)
>  {
>      switch(ot) {
>      case MO_8:
> -        if (!byte_reg_is_xH(reg)) {
> +        if (!byte_reg_is_xH(s, reg)) {
>              tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
>          } else {
>              tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
> @@ -388,9 +387,10 @@ static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
>      }
>  }
>
> -static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
> +static inline
> +void gen_op_mov_v_reg(DisasContext *s, TCGMemOp ot, TCGv t0, int reg)
>  {
> -    if (ot == MO_8 && byte_reg_is_xH(reg)) {
> +    if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
>          tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
>      } else {
>          tcg_gen_mov_tl(t0, cpu_regs[reg]);
> @@ -414,13 +414,13 @@ static inline
>  void gen_op_add_reg_im(DisasContext *s, TCGMemOp size, int reg, int32_t val)
>  {
>      tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
> -    gen_op_mov_reg_v(size, reg, s->tmp0);
> +    gen_op_mov_reg_v(s, size, reg, s->tmp0);
>  }
>
>  static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
>  {
>      tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
> -    gen_op_mov_reg_v(size, reg, s->tmp0);
> +    gen_op_mov_reg_v(s, size, reg, s->tmp0);
>  }
>
>  static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
> @@ -438,7 +438,7 @@ static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
>      if (d == OR_TMP0) {
>          gen_op_st_v(s, idx, s->T0, s->A0);
>      } else {
> -        gen_op_mov_reg_v(idx, d, s->T0);
> +        gen_op_mov_reg_v(s, idx, d, s->T0);
>      }
>  }
>
> @@ -1077,7 +1077,7 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
>
>  static inline void gen_stos(DisasContext *s, TCGMemOp ot)
>  {
> -    gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
> +    gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
>      gen_string_movl_A0_EDI(s);
>      gen_op_st_v(s, ot, s->T0, s->A0);
>      gen_op_movl_T0_Dshift(s, ot);
> @@ -1088,7 +1088,7 @@ static inline void gen_lods(DisasContext *s, TCGMemOp ot)
>  {
>      gen_string_movl_A0_ESI(s);
>      gen_op_ld_v(s, ot, s->T0, s->A0);
> -    gen_op_mov_reg_v(ot, R_EAX, s->T0);
> +    gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
>      gen_op_movl_T0_Dshift(s, ot);
>      gen_op_add_reg_T0(s, s->aflag, R_ESI);
>  }
> @@ -1272,7 +1272,7 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
>  static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
>  {
>      if (d != OR_TMP0) {
> -        gen_op_mov_v_reg(ot, s1->T0, d);
> +        gen_op_mov_v_reg(s1, ot, s1->T0, d);
>      } else if (!(s1->prefix & PREFIX_LOCK)) {
>          gen_op_ld_v(s1, ot, s1->T0, s1->A0);
>      }
> @@ -1383,7 +1383,7 @@ static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
>                                      s1->mem_index, ot | MO_LE);
>      } else {
>          if (d != OR_TMP0) {
> -            gen_op_mov_v_reg(ot, s1->T0, d);
> +            gen_op_mov_v_reg(s1, ot, s1->T0, d);
>          } else {
>              gen_op_ld_v(s1, ot, s1->T0, s1->A0);
>          }
> @@ -1450,7 +1450,7 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>      if (op1 == OR_TMP0) {
>          gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, s->T0, op1);
> +        gen_op_mov_v_reg(s, ot, s->T0, op1);
>      }
>
>      tcg_gen_andi_tl(s->T1, s->T1, mask);
> @@ -1486,7 +1486,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>      if (op1 == OR_TMP0)
>          gen_op_ld_v(s, ot, s->T0, s->A0);
>      else
> -        gen_op_mov_v_reg(ot, s->T0, op1);
> +        gen_op_mov_v_reg(s, ot, s->T0, op1);
>
>      op2 &= mask;
>      if (op2 != 0) {
> @@ -1526,7 +1526,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
>      if (op1 == OR_TMP0) {
>          gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, s->T0, op1);
> +        gen_op_mov_v_reg(s, ot, s->T0, op1);
>      }
>
>      tcg_gen_andi_tl(s->T1, s->T1, mask);
> @@ -1612,7 +1612,7 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
>      if (op1 == OR_TMP0) {
>          gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, s->T0, op1);
> +        gen_op_mov_v_reg(s, ot, s->T0, op1);
>      }
>
>      op2 &= mask;
> @@ -1690,7 +1690,7 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>      if (op1 == OR_TMP0)
>          gen_op_ld_v(s, ot, s->T0, s->A0);
>      else
> -        gen_op_mov_v_reg(ot, s->T0, op1);
> +        gen_op_mov_v_reg(s, ot, s->T0, op1);
>
>      if (is_right) {
>          switch (ot) {
> @@ -1746,7 +1746,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>      if (op1 == OR_TMP0) {
>          gen_op_ld_v(s, ot, s->T0, s->A0);
>      } else {
> -        gen_op_mov_v_reg(ot, s->T0, op1);
> +        gen_op_mov_v_reg(s, ot, s->T0, op1);
>      }
>
>      count = tcg_temp_new();
> @@ -1820,7 +1820,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
>  static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
>  {
>      if (s != OR_TMP1)
> -        gen_op_mov_v_reg(ot, s1->T1, s);
> +        gen_op_mov_v_reg(s1, ot, s1->T1, s);
>      switch(op) {
>      case OP_ROL:
>          gen_rot_rm_T1(s1, ot, d, 0);
> @@ -2133,23 +2133,23 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
>      if (mod == 3) {
>          if (is_store) {
>              if (reg != OR_TMP0)
> -                gen_op_mov_v_reg(ot, s->T0, reg);
> -            gen_op_mov_reg_v(ot, rm, s->T0);
> +                gen_op_mov_v_reg(s, ot, s->T0, reg);
> +            gen_op_mov_reg_v(s, ot, rm, s->T0);
>          } else {
> -            gen_op_mov_v_reg(ot, s->T0, rm);
> +            gen_op_mov_v_reg(s, ot, s->T0, rm);
>              if (reg != OR_TMP0)
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>          }
>      } else {
>          gen_lea_modrm(env, s, modrm);
>          if (is_store) {
>              if (reg != OR_TMP0)
> -                gen_op_mov_v_reg(ot, s->T0, reg);
> +                gen_op_mov_v_reg(s, ot, s->T0, reg);
>              gen_op_st_v(s, ot, s->T0, s->A0);
>          } else {
>              gen_op_ld_v(s, ot, s->T0, s->A0);
>              if (reg != OR_TMP0)
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>          }
>      }
>  }
> @@ -2260,7 +2260,7 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
>
>      tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
>                         s->T0, cpu_regs[reg]);
> -    gen_op_mov_reg_v(ot, reg, s->T0);
> +    gen_op_mov_reg_v(s, ot, reg, s->T0);
>
>      if (cc.mask != -1) {
>          tcg_temp_free(cc.reg);
> @@ -2354,7 +2354,7 @@ static void gen_push_v(DisasContext *s, TCGv val)
>      }
>
>      gen_op_st_v(s, d_ot, val, s->A0);
> -    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
> +    gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
>  }
>
>  /* two step pop is necessary for precise exceptions */
> @@ -2409,7 +2409,7 @@ static void gen_popa(DisasContext *s)
>          tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
>          gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
>          gen_op_ld_v(s, d_ot, s->T0, s->A0);
> -        gen_op_mov_reg_v(d_ot, 7 - i, s->T0);
> +        gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
>      }
>
>      gen_stack_update(s, 8 * size);
> @@ -2448,11 +2448,11 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
>      }
>
>      /* Copy the FrameTemp value to EBP.  */
> -    gen_op_mov_reg_v(a_ot, R_EBP, s->T1);
> +    gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
>
>      /* Compute the final value of ESP.  */
>      tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
> -    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
> +    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
>  }
>
>  static void gen_leave(DisasContext *s)
> @@ -2465,8 +2465,8 @@ static void gen_leave(DisasContext *s)
>
>      tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
>
> -    gen_op_mov_reg_v(d_ot, R_EBP, s->T0);
> -    gen_op_mov_reg_v(a_ot, R_ESP, s->T1);
> +    gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
> +    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
>  }
>
>  static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
> @@ -3598,7 +3598,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  goto illegal_op;
>  #endif
>              }
> -            gen_op_mov_reg_v(ot, reg, s->T0);
> +            gen_op_mov_reg_v(s, ot, reg, s->T0);
>              break;
>          case 0xc4: /* pinsrw */
>          case 0x1c4:
> @@ -3633,7 +3633,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                                  offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
>              }
>              reg = ((modrm >> 3) & 7) | rex_r;
> -            gen_op_mov_reg_v(ot, reg, s->T0);
> +            gen_op_mov_reg_v(s, ot, reg, s->T0);
>              break;
>          case 0x1d6: /* movq ea, xmm */
>              if (mod != 3) {
> @@ -3787,7 +3787,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                                   s->T0, tcg_const_i32(8 << ot));
>
>                  ot = mo_64_32(s->dflag);
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>                  break;
>
>              case 0x1f0: /* crc32 or movbe */
> @@ -3814,7 +3814,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  if ((b & 1) == 0) {
>                      tcg_gen_qemu_ld_tl(s->T0, s->A0,
>                                         s->mem_index, ot | MO_BE);
> -                    gen_op_mov_reg_v(ot, reg, s->T0);
> +                    gen_op_mov_reg_v(s, ot, reg, s->T0);
>                  } else {
>                      tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
>                                         s->mem_index, ot | MO_BE);
> @@ -3830,7 +3830,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  ot = mo_64_32(s->dflag);
>                  gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
>                  tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>                  gen_op_update1_cc(s);
>                  set_cc_op(s, CC_OP_LOGICB + ot);
>                  break;
> @@ -3868,7 +3868,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_subi_tl(s->T1, s->T1, 1);
>                      tcg_gen_and_tl(s->T0, s->T0, s->T1);
>
> -                    gen_op_mov_reg_v(ot, reg, s->T0);
> +                    gen_op_mov_reg_v(s, ot, reg, s->T0);
>                      gen_op_update1_cc(s);
>                      set_cc_op(s, CC_OP_LOGICB + ot);
>                  }
> @@ -3896,7 +3896,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                  tcg_gen_movi_tl(s->A0, -1);
>                  tcg_gen_shl_tl(s->A0, s->A0, s->T1);
>                  tcg_gen_andc_tl(s->T0, s->T0, s->A0);
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>                  gen_op_update1_cc(s);
>                  set_cc_op(s, CC_OP_BMILGB + ot);
>                  break;
> @@ -4071,7 +4071,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      }
>                      tcg_gen_shr_tl(s->T0, s->T0, s->T1);
>                  }
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>                  break;
>
>              case 0x0f3:
> @@ -4104,7 +4104,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      goto unknown_op;
>                  }
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
> -                gen_op_mov_reg_v(ot, s->vex_v, s->T0);
> +                gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
>                  set_cc_op(s, CC_OP_BMILGB + ot);
>                  break;
>
> @@ -4145,7 +4145,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_B(val & 15)));
>                      if (mod == 3) {
> -                        gen_op_mov_reg_v(ot, rm, s->T0);
> +                        gen_op_mov_reg_v(s, ot, rm, s->T0);
>                      } else {
>                          tcg_gen_qemu_st_tl(s->T0, s->A0,
>                                             s->mem_index, MO_UB);
> @@ -4155,7 +4155,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_W(val & 7)));
>                      if (mod == 3) {
> -                        gen_op_mov_reg_v(ot, rm, s->T0);
> +                        gen_op_mov_reg_v(s, ot, rm, s->T0);
>                      } else {
>                          tcg_gen_qemu_st_tl(s->T0, s->A0,
>                                             s->mem_index, MO_LEUW);
> @@ -4192,7 +4192,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
>                                              xmm_regs[reg].ZMM_L(val & 3)));
>                      if (mod == 3) {
> -                        gen_op_mov_reg_v(ot, rm, s->T0);
> +                        gen_op_mov_reg_v(s, ot, rm, s->T0);
>                      } else {
>                          tcg_gen_qemu_st_tl(s->T0, s->A0,
>                                             s->mem_index, MO_LEUL);
> @@ -4200,7 +4200,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      break;
>                  case 0x20: /* pinsrb */
>                      if (mod == 3) {
> -                        gen_op_mov_v_reg(MO_32, s->T0, rm);
> +                        gen_op_mov_v_reg(s, MO_32, s->T0, rm);
>                      } else {
>                          tcg_gen_qemu_ld_tl(s->T0, s->A0,
>                                             s->mem_index, MO_UB);
> @@ -4251,7 +4251,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      } else { /* pinsrq */
>  #ifdef TARGET_X86_64
>                          if (mod == 3) {
> -                            gen_op_mov_v_reg(ot, s->tmp1_i64, rm);
> +                            gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
>                          } else {
>                              tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
>                                                  s->mem_index, MO_LEQ);
> @@ -4326,7 +4326,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
>                      tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
>                      tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
>                  }
> -                gen_op_mov_reg_v(ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, ot, reg, s->T0);
>                  break;
>
>              default:
> @@ -4489,7 +4489,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>  #ifdef TARGET_X86_64
>      s->rex_x = 0;
>      s->rex_b = 0;
> -    x86_64_hregs = 0;
> +    s->x86_64_hregs = false;
>  #endif
>      s->rip_offset = 0; /* for relative ip address */
>      s->vex_l = 0;
> @@ -4548,7 +4548,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              rex_r = (b & 0x4) << 1;
>              s->rex_x = (b & 0x2) << 2;
>              REX_B(s) = (b & 0x1) << 3;
> -            x86_64_hregs = 1; /* select uniform byte register addressing */
> +            /* select uniform byte register addressing */
> +            s->x86_64_hregs = true;
>              goto next_byte;
>          }
>          break;
> @@ -4576,7 +4577,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  goto illegal_op;
>              }
>  #ifdef TARGET_X86_64
> -            if (x86_64_hregs) {
> +            if (s->x86_64_hregs) {
>                  goto illegal_op;
>              }
>  #endif
> @@ -4681,12 +4682,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      /* xor reg, reg optimisation */
>                      set_cc_op(s, CC_OP_CLR);
>                      tcg_gen_movi_tl(s->T0, 0);
> -                    gen_op_mov_reg_v(ot, reg, s->T0);
> +                    gen_op_mov_reg_v(s, ot, reg, s->T0);
>                      break;
>                  } else {
>                      opreg = rm;
>                  }
> -                gen_op_mov_v_reg(ot, s->T1, reg);
> +                gen_op_mov_v_reg(s, ot, s->T1, reg);
>                  gen_op(s, op, ot, opreg);
>                  break;
>              case 1: /* OP Gv, Ev */
> @@ -4700,7 +4701,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  } else if (op == OP_XORL && rm == reg) {
>                      goto xor_zero;
>                  } else {
> -                    gen_op_mov_v_reg(ot, s->T1, rm);
> +                    gen_op_mov_v_reg(s, ot, s->T1, rm);
>                  }
>                  gen_op(s, op, ot, reg);
>                  break;
> @@ -4786,7 +4787,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_op_ld_v(s, ot, s->T0, s->A0);
>              }
>          } else {
> -            gen_op_mov_v_reg(ot, s->T0, rm);
> +            gen_op_mov_v_reg(s, ot, s->T0, rm);
>          }
>
>          switch(op) {
> @@ -4809,7 +4810,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  if (mod != 3) {
>                      gen_op_st_v(s, ot, s->T0, s->A0);
>                  } else {
> -                    gen_op_mov_reg_v(ot, rm, s->T0);
> +                    gen_op_mov_reg_v(s, ot, rm, s->T0);
>                  }
>              }
>              break;
> @@ -4847,7 +4848,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  if (mod != 3) {
>                      gen_op_st_v(s, ot, s->T0, s->A0);
>                  } else {
> -                    gen_op_mov_reg_v(ot, rm, s->T0);
> +                    gen_op_mov_reg_v(s, ot, rm, s->T0);
>                  }
>              }
>              gen_op_update_neg_cc(s);
> @@ -4856,26 +4857,26 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 4: /* mul */
>              switch(ot) {
>              case MO_8:
> -                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
> +                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
>                  tcg_gen_ext8u_tl(s->T0, s->T0);
>                  tcg_gen_ext8u_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
>                  tcg_gen_mul_tl(s->T0, s->T0, s->T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
>                  set_cc_op(s, CC_OP_MULB);
>                  break;
>              case MO_16:
> -                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
> +                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
>                  tcg_gen_ext16u_tl(s->T0, s->T0);
>                  tcg_gen_ext16u_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
>                  tcg_gen_mul_tl(s->T0, s->T0, s->T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_shri_tl(s->T0, s->T0, 16);
> -                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
> +                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_src, s->T0);
>                  set_cc_op(s, CC_OP_MULW);
>                  break;
> @@ -4905,29 +4906,29 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          case 5: /* imul */
>              switch(ot) {
>              case MO_8:
> -                gen_op_mov_v_reg(MO_8, s->T1, R_EAX);
> +                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
>                  tcg_gen_ext8s_tl(s->T0, s->T0);
>                  tcg_gen_ext8s_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
>                  tcg_gen_mul_tl(s->T0, s->T0, s->T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_ext8s_tl(s->tmp0, s->T0);
>                  tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
>                  set_cc_op(s, CC_OP_MULB);
>                  break;
>              case MO_16:
> -                gen_op_mov_v_reg(MO_16, s->T1, R_EAX);
> +                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
>                  tcg_gen_ext16s_tl(s->T0, s->T0);
>                  tcg_gen_ext16s_tl(s->T1, s->T1);
>                  /* XXX: use 32 bit mul which could be faster */
>                  tcg_gen_mul_tl(s->T0, s->T0, s->T1);
> -                gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
>                  tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>                  tcg_gen_ext16s_tl(s->tmp0, s->T0);
>                  tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
>                  tcg_gen_shri_tl(s->T0, s->T0, 16);
> -                gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
> +                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
>                  set_cc_op(s, CC_OP_MULW);
>                  break;
>              default:
> @@ -5026,7 +5027,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              if (op >= 2 && op != 3 && op != 5)
>                  gen_op_ld_v(s, ot, s->T0, s->A0);
>          } else {
> -            gen_op_mov_v_reg(ot, s->T0, rm);
> +            gen_op_mov_v_reg(s, ot, s->T0, rm);
>          }
>
>          switch(op) {
> @@ -5115,7 +5116,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -        gen_op_mov_v_reg(ot, s->T1, reg);
> +        gen_op_mov_v_reg(s, ot, s->T1, reg);
>          gen_op_testl_T0_T1_cc(s);
>          set_cc_op(s, CC_OP_LOGICB + ot);
>          break;
> @@ -5125,7 +5126,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          ot = mo_b_d(b, dflag);
>          val = insn_get(env, s, ot);
>
> -        gen_op_mov_v_reg(ot, s->T0, OR_EAX);
> +        gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
>          tcg_gen_movi_tl(s->T1, val);
>          gen_op_testl_T0_T1_cc(s);
>          set_cc_op(s, CC_OP_LOGICB + ot);
> @@ -5135,20 +5136,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          switch (dflag) {
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
> +            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
>              tcg_gen_ext32s_tl(s->T0, s->T0);
> -            gen_op_mov_reg_v(MO_64, R_EAX, s->T0);
> +            gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
>              break;
>  #endif
>          case MO_32:
> -            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
> +            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
>              tcg_gen_ext16s_tl(s->T0, s->T0);
> -            gen_op_mov_reg_v(MO_32, R_EAX, s->T0);
> +            gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
>              break;
>          case MO_16:
> -            gen_op_mov_v_reg(MO_8, s->T0, R_EAX);
> +            gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
>              tcg_gen_ext8s_tl(s->T0, s->T0);
> -            gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +            gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
>              break;
>          default:
>              tcg_abort();
> @@ -5158,22 +5159,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          switch (dflag) {
>  #ifdef TARGET_X86_64
>          case MO_64:
> -            gen_op_mov_v_reg(MO_64, s->T0, R_EAX);
> +            gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
>              tcg_gen_sari_tl(s->T0, s->T0, 63);
> -            gen_op_mov_reg_v(MO_64, R_EDX, s->T0);
> +            gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
>              break;
>  #endif
>          case MO_32:
> -            gen_op_mov_v_reg(MO_32, s->T0, R_EAX);
> +            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
>              tcg_gen_ext32s_tl(s->T0, s->T0);
>              tcg_gen_sari_tl(s->T0, s->T0, 31);
> -            gen_op_mov_reg_v(MO_32, R_EDX, s->T0);
> +            gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
>              break;
>          case MO_16:
> -            gen_op_mov_v_reg(MO_16, s->T0, R_EAX);
> +            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
>              tcg_gen_ext16s_tl(s->T0, s->T0);
>              tcg_gen_sari_tl(s->T0, s->T0, 15);
> -            gen_op_mov_reg_v(MO_16, R_EDX, s->T0);
> +            gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
>              break;
>          default:
>              tcg_abort();
> @@ -5197,7 +5198,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              val = (int8_t)insn_get(env, s, MO_8);
>              tcg_gen_movi_tl(s->T1, val);
>          } else {
> -            gen_op_mov_v_reg(ot, s->T1, reg);
> +            gen_op_mov_v_reg(s, ot, s->T1, reg);
>          }
>          switch (ot) {
>  #ifdef TARGET_X86_64
> @@ -5227,7 +5228,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              tcg_gen_mov_tl(cpu_cc_dst, s->T0);
>              tcg_gen_ext16s_tl(s->tmp0, s->T0);
>              tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
> -            gen_op_mov_reg_v(ot, reg, s->T0);
> +            gen_op_mov_reg_v(s, ot, reg, s->T0);
>              break;
>          }
>          set_cc_op(s, CC_OP_MULB + ot);
> @@ -5238,13 +5239,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          modrm = x86_ldub_code(env, s);
>          reg = ((modrm >> 3) & 7) | rex_r;
>          mod = (modrm >> 6) & 3;
> -        gen_op_mov_v_reg(ot, s->T0, reg);
> +        gen_op_mov_v_reg(s, ot, s->T0, reg);
>          if (mod == 3) {
>              rm = (modrm & 7) | REX_B(s);
> -            gen_op_mov_v_reg(ot, s->T1, rm);
> +            gen_op_mov_v_reg(s, ot, s->T1, rm);
>              tcg_gen_add_tl(s->T0, s->T0, s->T1);
> -            gen_op_mov_reg_v(ot, reg, s->T1);
> -            gen_op_mov_reg_v(ot, rm, s->T0);
> +            gen_op_mov_reg_v(s, ot, reg, s->T1);
> +            gen_op_mov_reg_v(s, ot, rm, s->T0);
>          } else {
>              gen_lea_modrm(env, s, modrm);
>              if (s->prefix & PREFIX_LOCK) {
> @@ -5256,7 +5257,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  tcg_gen_add_tl(s->T0, s->T0, s->T1);
>                  gen_op_st_v(s, ot, s->T0, s->A0);
>              }
> -            gen_op_mov_reg_v(ot, reg, s->T1);
> +            gen_op_mov_reg_v(s, ot, reg, s->T1);
>          }
>          gen_op_update2_cc(s);
>          set_cc_op(s, CC_OP_ADDB + ot);
> @@ -5273,7 +5274,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              oldv = tcg_temp_new();
>              newv = tcg_temp_new();
>              cmpv = tcg_temp_new();
> -            gen_op_mov_v_reg(ot, newv, reg);
> +            gen_op_mov_v_reg(s, ot, newv, reg);
>              tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
>
>              if (s->prefix & PREFIX_LOCK) {
> @@ -5283,11 +5284,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_lea_modrm(env, s, modrm);
>                  tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
>                                            s->mem_index, ot | MO_LE);
> -                gen_op_mov_reg_v(ot, R_EAX, oldv);
> +                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
>              } else {
>                  if (mod == 3) {
>                      rm = (modrm & 7) | REX_B(s);
> -                    gen_op_mov_v_reg(ot, oldv, rm);
> +                    gen_op_mov_v_reg(s, ot, oldv, rm);
>                  } else {
>                      gen_lea_modrm(env, s, modrm);
>                      gen_op_ld_v(s, ot, oldv, s->A0);
> @@ -5298,15 +5299,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  /* store value = (old == cmp ? new : old);  */
>                  tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
>                  if (mod == 3) {
> -                    gen_op_mov_reg_v(ot, R_EAX, oldv);
> -                    gen_op_mov_reg_v(ot, rm, newv);
> +                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
> +                    gen_op_mov_reg_v(s, ot, rm, newv);
>                  } else {
>                      /* Perform an unconditional store cycle like physical cpu;
>                         must be before changing accumulator to ensure
>                         idempotency if the store faults and the instruction
>                         is restarted */
>                      gen_op_st_v(s, ot, newv, s->A0);
> -                    gen_op_mov_reg_v(ot, R_EAX, oldv);
> +                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
>                  }
>              }
>              tcg_gen_mov_tl(cpu_cc_src, oldv);
> @@ -5351,14 +5352,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          /**************************/
>          /* push/pop */
>      case 0x50 ... 0x57: /* push */
> -        gen_op_mov_v_reg(MO_32, s->T0, (b & 7) | REX_B(s));
> +        gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
>          gen_push_v(s, s->T0);
>          break;
>      case 0x58 ... 0x5f: /* pop */
>          ot = gen_pop_T0(s);
>          /* NOTE: order is important for pop %sp */
>          gen_pop_update(s, ot);
> -        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), s->T0);
> +        gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
>          break;
>      case 0x60: /* pusha */
>          if (CODE64(s))
> @@ -5388,7 +5389,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              /* NOTE: order is important for pop %sp */
>              gen_pop_update(s, ot);
>              rm = (modrm & 7) | REX_B(s);
> -            gen_op_mov_reg_v(ot, rm, s->T0);
> +            gen_op_mov_reg_v(s, ot, rm, s->T0);
>          } else {
>              /* NOTE: order is important too for MMU exceptions */
>              s->popl_esp_hack = 1 << ot;
> @@ -5478,7 +5479,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod != 3) {
>              gen_op_st_v(s, ot, s->T0, s->A0);
>          } else {
> -            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), s->T0);
> +            gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
>          }
>          break;
>      case 0x8a:
> @@ -5488,7 +5489,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>
>          gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> -        gen_op_mov_reg_v(ot, reg, s->T0);
> +        gen_op_mov_reg_v(s, ot, reg, s->T0);
>          break;
>      case 0x8e: /* mov seg, Gv */
>          modrm = x86_ldub_code(env, s);
> @@ -5540,10 +5541,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              rm = (modrm & 7) | REX_B(s);
>
>              if (mod == 3) {
> -                if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
> +                if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
>                      tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
>                  } else {
> -                    gen_op_mov_v_reg(ot, s->T0, rm);
> +                    gen_op_mov_v_reg(s, ot, s->T0, rm);
>                      switch (s_ot) {
>                      case MO_UB:
>                          tcg_gen_ext8u_tl(s->T0, s->T0);
> @@ -5560,11 +5561,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                          break;
>                      }
>                  }
> -                gen_op_mov_reg_v(d_ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
>              } else {
>                  gen_lea_modrm(env, s, modrm);
>                  gen_op_ld_v(s, s_ot, s->T0, s->A0);
> -                gen_op_mov_reg_v(d_ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
>              }
>          }
>          break;
> @@ -5579,7 +5580,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              AddressParts a = gen_lea_modrm_0(env, s, modrm);
>              TCGv ea = gen_lea_modrm_1(s, a);
>              gen_lea_v_seg(s, s->aflag, ea, -1, -1);
> -            gen_op_mov_reg_v(dflag, reg, s->A0);
> +            gen_op_mov_reg_v(s, dflag, reg, s->A0);
>          }
>          break;
>
> @@ -5605,9 +5606,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              gen_add_A0_ds_seg(s);
>              if ((b & 2) == 0) {
>                  gen_op_ld_v(s, ot, s->T0, s->A0);
> -                gen_op_mov_reg_v(ot, R_EAX, s->T0);
> +                gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
>              } else {
> -                gen_op_mov_v_reg(ot, s->T0, R_EAX);
> +                gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
>                  gen_op_st_v(s, ot, s->T0, s->A0);
>              }
>          }
> @@ -5619,12 +5620,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_extu(s->aflag, s->A0);
>          gen_add_A0_ds_seg(s);
>          gen_op_ld_v(s, MO_8, s->T0, s->A0);
> -        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
> +        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
>          break;
>      case 0xb0 ... 0xb7: /* mov R, Ib */
>          val = insn_get(env, s, MO_8);
>          tcg_gen_movi_tl(s->T0, val);
> -        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), s->T0);
> +        gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
>          break;
>      case 0xb8 ... 0xbf: /* mov R, Iv */
>  #ifdef TARGET_X86_64
> @@ -5634,7 +5635,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              tmp = x86_ldq_code(env, s);
>              reg = (b & 7) | REX_B(s);
>              tcg_gen_movi_tl(s->T0, tmp);
> -            gen_op_mov_reg_v(MO_64, reg, s->T0);
> +            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
>          } else
>  #endif
>          {
> @@ -5642,7 +5643,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              val = insn_get(env, s, ot);
>              reg = (b & 7) | REX_B(s);
>              tcg_gen_movi_tl(s->T0, val);
> -            gen_op_mov_reg_v(ot, reg, s->T0);
> +            gen_op_mov_reg_v(s, ot, reg, s->T0);
>          }
>          break;
>
> @@ -5661,17 +5662,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          if (mod == 3) {
>              rm = (modrm & 7) | REX_B(s);
>          do_xchg_reg:
> -            gen_op_mov_v_reg(ot, s->T0, reg);
> -            gen_op_mov_v_reg(ot, s->T1, rm);
> -            gen_op_mov_reg_v(ot, rm, s->T0);
> -            gen_op_mov_reg_v(ot, reg, s->T1);
> +            gen_op_mov_v_reg(s, ot, s->T0, reg);
> +            gen_op_mov_v_reg(s, ot, s->T1, rm);
> +            gen_op_mov_reg_v(s, ot, rm, s->T0);
> +            gen_op_mov_reg_v(s, ot, reg, s->T1);
>          } else {
>              gen_lea_modrm(env, s, modrm);
> -            gen_op_mov_v_reg(ot, s->T0, reg);
> +            gen_op_mov_v_reg(s, ot, s->T0, reg);
>              /* for xchg, lock is implicit */
>              tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
>                                     s->mem_index, ot | MO_LE);
> -            gen_op_mov_reg_v(ot, reg, s->T1);
> +            gen_op_mov_reg_v(s, ot, reg, s->T1);
>          }
>          break;
>      case 0xc4: /* les Gv */
> @@ -5704,7 +5705,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_op_ld_v(s, MO_16, s->T0, s->A0);
>          gen_movl_seg_T0(s, op);
>          /* then put the data */
> -        gen_op_mov_reg_v(ot, reg, s->T1);
> +        gen_op_mov_reg_v(s, ot, reg, s->T1);
>          if (s->base.is_jmp) {
>              gen_jmp_im(s, s->pc - s->cs_base);
>              gen_eob(s);
> @@ -5783,7 +5784,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          } else {
>              opreg = rm;
>          }
> -        gen_op_mov_v_reg(ot, s->T1, reg);
> +        gen_op_mov_v_reg(s, ot, s->T1, reg);
>
>          if (shift) {
>              TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
> @@ -6244,7 +6245,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  case 0:
>                      gen_helper_fnstsw(s->tmp2_i32, cpu_env);
>                      tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
> -                    gen_op_mov_reg_v(MO_16, R_EAX, s->T0);
> +                    gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
>                      break;
>                  default:
>                      goto unknown_op;
> @@ -6397,7 +6398,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>  	}
>          tcg_gen_movi_i32(s->tmp2_i32, val);
>          gen_helper_in_func(ot, s->T1, s->tmp2_i32);
> -        gen_op_mov_reg_v(ot, R_EAX, s->T1);
> +        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
>          gen_bpt_io(s, s->tmp2_i32, ot);
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_end();
> @@ -6411,7 +6412,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          tcg_gen_movi_tl(s->T0, val);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes));
> -        gen_op_mov_v_reg(ot, s->T1, R_EAX);
> +        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
>
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_start();
> @@ -6436,7 +6437,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>  	}
>          tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
>          gen_helper_in_func(ot, s->T1, s->tmp2_i32);
> -        gen_op_mov_reg_v(ot, R_EAX, s->T1);
> +        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
>          gen_bpt_io(s, s->tmp2_i32, ot);
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_end();
> @@ -6449,7 +6450,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
>          gen_check_io(s, ot, pc_start - s->cs_base,
>                       svm_is_rep(prefixes));
> -        gen_op_mov_v_reg(ot, s->T1, R_EAX);
> +        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
>
>          if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>              gen_io_start();
> @@ -6708,7 +6709,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>      case 0x9e: /* sahf */
>          if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
>              goto illegal_op;
> -        gen_op_mov_v_reg(MO_8, s->T0, R_AH);
> +        gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
>          gen_compute_eflags(s);
>          tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
>          tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
> @@ -6720,7 +6721,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_compute_eflags(s);
>          /* Note: gen_compute_eflags() only gives the condition codes */
>          tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
> -        gen_op_mov_reg_v(MO_8, R_AH, s->T0);
> +        gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
>          break;
>      case 0xf5: /* cmc */
>          gen_compute_eflags(s);
> @@ -6758,7 +6759,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_op_ld_v(s, ot, s->T0, s->A0);
>              }
>          } else {
> -            gen_op_mov_v_reg(ot, s->T0, rm);
> +            gen_op_mov_v_reg(s, ot, s->T0, rm);
>          }
>          /* load shift */
>          val = x86_ldub_code(env, s);
> @@ -6784,7 +6785,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = ((modrm >> 3) & 7) | rex_r;
>          mod = (modrm >> 6) & 3;
>          rm = (modrm & 7) | REX_B(s);
> -        gen_op_mov_v_reg(MO_32, s->T1, reg);
> +        gen_op_mov_v_reg(s, MO_32, s->T1, reg);
>          if (mod != 3) {
>              AddressParts a = gen_lea_modrm_0(env, s, modrm);
>              /* specific case: we need to add a displacement */
> @@ -6797,7 +6798,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_op_ld_v(s, ot, s->T0, s->A0);
>              }
>          } else {
> -            gen_op_mov_v_reg(ot, s->T0, rm);
> +            gen_op_mov_v_reg(s, ot, s->T0, rm);
>          }
>      bt_op:
>          tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
> @@ -6847,7 +6848,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  if (mod != 3) {
>                      gen_op_st_v(s, ot, s->T0, s->A0);
>                  } else {
> -                    gen_op_mov_reg_v(ot, rm, s->T0);
> +                    gen_op_mov_reg_v(s, ot, rm, s->T0);
>                  }
>              }
>          }
> @@ -6930,7 +6931,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
>              }
>          }
> -        gen_op_mov_reg_v(ot, reg, s->T0);
> +        gen_op_mov_reg_v(s, ot, reg, s->T0);
>          break;
>          /************************/
>          /* bcd */
> @@ -7070,7 +7071,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          mod = (modrm >> 6) & 3;
>          if (mod == 3)
>              goto illegal_op;
> -        gen_op_mov_v_reg(ot, s->T0, reg);
> +        gen_op_mov_v_reg(s, ot, s->T0, reg);
>          gen_lea_modrm(env, s, modrm);
>          tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
>          if (ot == MO_16) {
> @@ -7083,16 +7084,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          reg = (b & 7) | REX_B(s);
>  #ifdef TARGET_X86_64
>          if (dflag == MO_64) {
> -            gen_op_mov_v_reg(MO_64, s->T0, reg);
> +            gen_op_mov_v_reg(s, MO_64, s->T0, reg);
>              tcg_gen_bswap64_i64(s->T0, s->T0);
> -            gen_op_mov_reg_v(MO_64, reg, s->T0);
> +            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
>          } else
>  #endif
>          {
> -            gen_op_mov_v_reg(MO_32, s->T0, reg);
> +            gen_op_mov_v_reg(s, MO_32, s->T0, reg);
>              tcg_gen_ext32u_tl(s->T0, s->T0);
>              tcg_gen_bswap32_tl(s->T0, s->T0);
> -            gen_op_mov_reg_v(MO_32, reg, s->T0);
> +            gen_op_mov_reg_v(s, MO_32, reg, s->T0);
>          }
>          break;
>      case 0xd6: /* salc */
> @@ -7100,7 +7101,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              goto illegal_op;
>          gen_compute_eflags_c(s, s->T0);
>          tcg_gen_neg_tl(s->T0, s->T0);
> -        gen_op_mov_reg_v(MO_8, R_EAX, s->T0);
> +        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
>          break;
>      case 0xe0: /* loopnz */
>      case 0xe1: /* loopz */
> @@ -7661,16 +7662,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              rm = (modrm & 7) | REX_B(s);
>
>              if (mod == 3) {
> -                gen_op_mov_v_reg(MO_32, s->T0, rm);
> +                gen_op_mov_v_reg(s, MO_32, s->T0, rm);
>                  /* sign extend */
>                  if (d_ot == MO_64) {
>                      tcg_gen_ext32s_tl(s->T0, s->T0);
>                  }
> -                gen_op_mov_reg_v(d_ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
>              } else {
>                  gen_lea_modrm(env, s, modrm);
>                  gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
> -                gen_op_mov_reg_v(d_ot, reg, s->T0);
> +                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
>              }
>          } else
>  #endif
> @@ -7694,10 +7695,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  a0 = tcg_temp_local_new();
>                  tcg_gen_mov_tl(a0, s->A0);
>              } else {
> -                gen_op_mov_v_reg(ot, t0, rm);
> +                gen_op_mov_v_reg(s, ot, t0, rm);
>                  a0 = NULL;
>              }
> -            gen_op_mov_v_reg(ot, t1, reg);
> +            gen_op_mov_v_reg(s, ot, t1, reg);
>              tcg_gen_andi_tl(s->tmp0, t0, 3);
>              tcg_gen_andi_tl(t1, t1, 3);
>              tcg_gen_movi_tl(t2, 0);
> @@ -7711,7 +7712,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_op_st_v(s, ot, t0, a0);
>                  tcg_temp_free(a0);
>             } else {
> -                gen_op_mov_reg_v(ot, rm, t0);
> +                gen_op_mov_reg_v(s, ot, rm, t0);
>              }
>              gen_compute_eflags(s);
>              tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
> @@ -7742,7 +7743,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
>              label1 = gen_new_label();
>              tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
> -            gen_op_mov_reg_v(ot, reg, t0);
> +            gen_op_mov_reg_v(s, ot, reg, t0);
>              gen_set_label(label1);
>              set_cc_op(s, CC_OP_EFLAGS);
>              tcg_temp_free(t0);
> @@ -7996,7 +7997,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>                          gen_io_start();
>                      }
> -                    gen_op_mov_v_reg(ot, s->T0, rm);
> +                    gen_op_mov_v_reg(s, ot, s->T0, rm);
>                      gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
>                                           s->T0);
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
> @@ -8009,7 +8010,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                          gen_io_start();
>                      }
>                      gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
> -                    gen_op_mov_reg_v(ot, rm, s->T0);
> +                    gen_op_mov_reg_v(s, ot, rm, s->T0);
>                      if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
>                          gen_io_end();
>                      }
> @@ -8042,7 +8043,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>              }
>              if (b & 2) {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
> -                gen_op_mov_v_reg(ot, s->T0, rm);
> +                gen_op_mov_v_reg(s, ot, s->T0, rm);
>                  tcg_gen_movi_i32(s->tmp2_i32, reg);
>                  gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
>                  gen_jmp_im(s, s->pc - s->cs_base);
> @@ -8051,7 +8052,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
>                  tcg_gen_movi_i32(s->tmp2_i32, reg);
>                  gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
> -                gen_op_mov_reg_v(ot, rm, s->T0);
> +                gen_op_mov_reg_v(s, ot, rm, s->T0);
>              }
>          }
>          break;
> @@ -8313,7 +8314,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
>          gen_extu(ot, s->T0);
>          tcg_gen_mov_tl(cpu_cc_src, s->T0);
>          tcg_gen_ctpop_tl(s->T0, s->T0);
> -        gen_op_mov_reg_v(ot, reg, s->T0);
> +        gen_op_mov_reg_v(s, ot, reg, s->T0);
>
>          set_cc_op(s, CC_OP_POPCNT);
>          break;


--
Alex Bennée

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2018-09-13 14:31 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-11 20:28 [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Emilio G. Cota
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 01/13] target/i386: move cpu_cc_srcT to DisasContext Emilio G. Cota
2018-09-11 20:44   ` Richard Henderson
2018-09-13 14:21   ` Alex Bennée
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 02/13] target/i386: move cpu_A0 " Emilio G. Cota
2018-09-11 20:45   ` Richard Henderson
2018-09-13 14:23   ` Alex Bennée
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 03/13] target/i386: move cpu_T0 " Emilio G. Cota
2018-09-11 20:47   ` Richard Henderson
2018-09-13 14:25   ` Alex Bennée
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 04/13] target/i386: move cpu_T1 " Emilio G. Cota
2018-09-11 20:48   ` Richard Henderson
2018-09-13 14:26   ` Alex Bennée
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 05/13] target/i386: move cpu_tmp0 " Emilio G. Cota
2018-09-11 20:51   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 06/13] target/i386: move cpu_tmp4 " Emilio G. Cota
2018-09-11 20:52   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 07/13] target/i386: move cpu_ptr0 " Emilio G. Cota
2018-09-11 20:53   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 08/13] target/i386: move cpu_ptr1 " Emilio G. Cota
2018-09-11 20:54   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 09/13] target/i386: move cpu_tmp2_i32 " Emilio G. Cota
2018-09-11 20:55   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 10/13] target/i386: move cpu_tmp3_i32 " Emilio G. Cota
2018-09-11 20:56   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 11/13] target/i386: move cpu_tmp1_i64 " Emilio G. Cota
2018-09-11 20:57   ` Richard Henderson
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 12/13] target/i386: move x86_64_hregs " Emilio G. Cota
2018-09-11 20:58   ` Richard Henderson
2018-09-13 14:31   ` Alex Bennée
2018-09-11 20:28 ` [Qemu-devel] [PATCH v3 13/13] configure: enable mttcg for i386 and x86_64 Emilio G. Cota
2018-09-12 12:46 ` [Qemu-devel] [PATCH v3 00/13] i386 + x86_64 mttcg Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.