All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 5/6] softmmu templates: optionally pass CPUState to memory access functions
@ 2012-01-07 22:26 Blue Swirl
  0 siblings, 0 replies; only message in thread
From: Blue Swirl @ 2012-01-07 22:26 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 58428 bytes --]

Optionally, make memory access helpers take a parameter for CPUState
instead of relying on global env.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 cpu-all.h              |    9 +++
 exec-all.h             |    2 +
 exec.c                 |    4 ++
 softmmu_defs.h         |   20 +++++++
 softmmu_header.h       |   60 ++++++++++++++++-----
 softmmu_template.h     |   82 +++++++++++++++++++---------
 tcg/arm/tcg-target.c   |   53 ++++++++++++++++++
 tcg/hppa/tcg-target.c  |   44 +++++++++++++++
 tcg/i386/tcg-target.c  |  140 +++++++++++++++++++++++++++++++++++++++--------
 tcg/ia64/tcg-target.c  |   46 ++++++++++++++++
 tcg/mips/tcg-target.c  |   44 +++++++++++++++
 tcg/ppc/tcg-target.c   |   45 +++++++++++++++
 tcg/ppc64/tcg-target.c |   44 +++++++++++++++
 tcg/s390/tcg-target.c  |   44 +++++++++++++++
 tcg/sparc/tcg-target.c |   70 +++++++++++++++++++-----
 tcg/tci/tcg-target.c   |    6 ++
 16 files changed, 633 insertions(+), 80 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index e2c3c49..53aa008 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -259,12 +259,21 @@ extern unsigned long reserved_va;
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)

+#ifndef CONFIG_TCG_PASS_AREG0
 #define ldub_code(p) ldub_raw(p)
 #define ldsb_code(p) ldsb_raw(p)
 #define lduw_code(p) lduw_raw(p)
 #define ldsw_code(p) ldsw_raw(p)
 #define ldl_code(p) ldl_raw(p)
 #define ldq_code(p) ldq_raw(p)
+#else
+#define cpu_ldub_code(env1, p) ldub_raw(p)
+#define cpu_ldsb_code(env1, p) ldsb_raw(p)
+#define cpu_lduw_code(env1, p) lduw_raw(p)
+#define cpu_ldsw_code(env1, p) ldsw_raw(p)
+#define cpu_ldl_code(env1, p) ldl_raw(p)
+#define cpu_ldq_code(env1, p) ldq_raw(p)
+#endif

 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
diff --git a/exec-all.h b/exec-all.h
index 51d01f2..88a57f7 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -311,7 +311,9 @@ void tlb_fill(CPUState *env1, target_ulong addr,
int is_write, int mmu_idx,

 #define ACCESS_TYPE (NB_MMU_MODES + 1)
 #define MEMSUFFIX _code
+#ifndef CONFIG_TCG_PASS_AREG0
 #define env cpu_single_env
+#endif

 #define DATA_SIZE 1
 #include "softmmu_header.h"
diff --git a/exec.c b/exec.c
index b1d6602..fd4ba4e 100644
--- a/exec.c
+++ b/exec.c
@@ -4375,7 +4375,11 @@ tb_page_addr_t get_page_addr_code(CPUState
*env1, target_ulong addr)
     mmu_idx = cpu_mmu_index(env1);
     if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
                  (addr & TARGET_PAGE_MASK))) {
+#ifdef CONFIG_TCG_PASS_AREG0
+        cpu_ldub_code(env1, addr);
+#else
         ldub_code(addr);
+#endif
     }
     pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
     if (pd != io_mem_ram.ram_addr && pd != io_mem_rom.ram_addr
diff --git a/softmmu_defs.h b/softmmu_defs.h
index c5a2bcd..ba949e3 100644
--- a/softmmu_defs.h
+++ b/softmmu_defs.h
@@ -9,6 +9,7 @@
 #ifndef SOFTMMU_DEFS_H
 #define SOFTMMU_DEFS_H

+#ifndef CONFIG_TCG_PASS_AREG0
 uint8_t REGPARM __ldb_mmu(target_ulong addr, int mmu_idx);
 void REGPARM __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx);
 uint16_t REGPARM __ldw_mmu(target_ulong addr, int mmu_idx);
@@ -26,5 +27,24 @@ uint32_t REGPARM __ldl_cmmu(target_ulong addr, int mmu_idx);
 void REGPARM __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx);
 uint64_t REGPARM __ldq_cmmu(target_ulong addr, int mmu_idx);
 void REGPARM __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx);
+#else
+uint8_t REGPARM helper_ldb_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void REGPARM helper_stb_mmu(CPUState *env, target_ulong addr, uint8_t
val, int mmu_idx);
+uint16_t REGPARM helper_ldw_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void REGPARM helper_stw_mmu(CPUState *env, target_ulong addr,
uint16_t val, int mmu_idx);
+uint32_t REGPARM helper_ldl_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void REGPARM helper_stl_mmu(CPUState *env, target_ulong addr,
uint32_t val, int mmu_idx);
+uint64_t REGPARM helper_ldq_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void REGPARM helper_stq_mmu(CPUState *env, target_ulong addr,
uint64_t val, int mmu_idx);
+
+uint8_t REGPARM helper_ldb_cmmu(CPUState *env, target_ulong addr, int mmu_idx);
+void REGPARM helper_stb_cmmu(CPUState *env, target_ulong addr,
uint8_t val, int mmu_idx);
+uint16_t REGPARM helper_ldw_cmmu(CPUState *env, target_ulong addr,
int mmu_idx);
+void REGPARM helper_stw_cmmu(CPUState *env, target_ulong addr,
uint16_t val, int mmu_idx);
+uint32_t REGPARM helper_ldl_cmmu(CPUState *env, target_ulong addr,
int mmu_idx);
+void REGPARM helper_stl_cmmu(CPUState *env, target_ulong addr,
uint32_t val, int mmu_idx);
+uint64_t REGPARM helper_ldq_cmmu(CPUState *env, target_ulong addr,
int mmu_idx);
+void REGPARM helper_stq_cmmu(CPUState *env, target_ulong addr,
uint64_t val, int mmu_idx);
+#endif

 #endif
diff --git a/softmmu_header.h b/softmmu_header.h
index 818d7b6..3c0b298 100644
--- a/softmmu_header.h
+++ b/softmmu_header.h
@@ -78,9 +78,23 @@
 #define ADDR_READ addr_read
 #endif

+#ifndef CONFIG_TCG_PASS_AREG0
+#define ENV_PARAM
+#define ENV_VAR
+#define CPU_PREFIX
+#define HELPER_PREFIX __
+#else
+#define ENV_PARAM CPUState *env,
+#define ENV_VAR env,
+#define CPU_PREFIX cpu_
+#define HELPER_PREFIX helper_
+#endif
+
 /* generic load/store macros */

-static inline RES_TYPE glue(glue(ld, USUFFIX), MEMSUFFIX)(target_ulong ptr)
+static inline RES_TYPE
+glue(glue(glue(CPU_PREFIX, ld), USUFFIX), MEMSUFFIX)(ENV_PARAM
+                                                     target_ulong ptr)
 {
     int page_index;
     RES_TYPE res;
@@ -93,7 +107,9 @@ static inline RES_TYPE glue(glue(ld, USUFFIX),
MEMSUFFIX)(target_ulong ptr)
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx);
+        res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_VAR
+                                                                     addr,
+                                                                     mmu_idx);
     } else {
         physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)physaddr);
@@ -102,7 +118,9 @@ static inline RES_TYPE glue(glue(ld, USUFFIX),
MEMSUFFIX)(target_ulong ptr)
 }

 #if DATA_SIZE <= 2
-static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
+static inline int
+glue(glue(glue(CPU_PREFIX, lds), SUFFIX), MEMSUFFIX)(ENV_PARAM
+                                                     target_ulong ptr)
 {
     int res, page_index;
     target_ulong addr;
@@ -114,7 +132,8 @@ static inline int glue(glue(lds, SUFFIX),
MEMSUFFIX)(target_ulong ptr)
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = (DATA_STYPE)glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx);
+        res = (DATA_STYPE)glue(glue(glue(HELPER_PREFIX, ld), SUFFIX),
+                               MMUSUFFIX)(ENV_VAR addr, mmu_idx);
     } else {
         physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(lds, SUFFIX), _raw)((uint8_t *)physaddr);
@@ -127,7 +146,9 @@ static inline int glue(glue(lds, SUFFIX),
MEMSUFFIX)(target_ulong ptr)

 /* generic store macro */

-static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong
ptr, RES_TYPE v)
+static inline void
+glue(glue(glue(CPU_PREFIX, st), SUFFIX), MEMSUFFIX)(ENV_PARAM target_ulong ptr,
+                                                    RES_TYPE v)
 {
     int page_index;
     target_ulong addr;
@@ -139,7 +160,8 @@ static inline void glue(glue(st, SUFFIX),
MEMSUFFIX)(target_ulong ptr, RES_TYPE
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        glue(glue(__st, SUFFIX), MMUSUFFIX)(addr, v, mmu_idx);
+        glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_VAR addr, v,
+                                                               mmu_idx);
     } else {
         physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         glue(glue(st, SUFFIX), _raw)((uint8_t *)physaddr, v);
@@ -151,46 +173,52 @@ static inline void glue(glue(st, SUFFIX),
MEMSUFFIX)(target_ulong ptr, RES_TYPE
 #if ACCESS_TYPE != (NB_MMU_MODES + 1)

 #if DATA_SIZE == 8
-static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
+static inline float64 glue(glue(CPU_PREFIX, ldfq), MEMSUFFIX)(ENV_PARAM
+                                                              target_ulong ptr)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
-    u.i = glue(ldq, MEMSUFFIX)(ptr);
+    u.i = glue(glue(CPU_PREFIX, ldq), MEMSUFFIX)(ENV_VAR ptr);
     return u.d;
 }

-static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v)
+static inline void glue(glue(CPU_PREFIX, stfq), MEMSUFFIX)(ENV_PARAM
+                                                           target_ulong ptr,
+                                                           float64 v)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
     u.d = v;
-    glue(stq, MEMSUFFIX)(ptr, u.i);
+    glue(glue(CPU_PREFIX, stq), MEMSUFFIX)(ENV_VAR ptr, u.i);
 }
 #endif /* DATA_SIZE == 8 */

 #if DATA_SIZE == 4
-static inline float32 glue(ldfl, MEMSUFFIX)(target_ulong ptr)
+static inline float32 glue(glue(CPU_PREFIX, ldfl), MEMSUFFIX)(ENV_PARAM
+                                                              target_ulong ptr)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
-    u.i = glue(ldl, MEMSUFFIX)(ptr);
+    u.i = glue(glue(CPU_PREFIX, ldl), MEMSUFFIX)(ENV_VAR ptr);
     return u.f;
 }

-static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
+static inline void glue(glue(CPU_PREFIX, stfl), MEMSUFFIX)(ENV_PARAM
+                                                           target_ulong ptr,
+                                                           float32 v)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
     u.f = v;
-    glue(stl, MEMSUFFIX)(ptr, u.i);
+    glue(glue(CPU_PREFIX, stl), MEMSUFFIX)(ENV_VAR ptr, u.i);
 }
 #endif /* DATA_SIZE == 4 */

@@ -205,3 +233,7 @@ static inline void glue(stfl,
MEMSUFFIX)(target_ulong ptr, float32 v)
 #undef CPU_MMU_INDEX
 #undef MMUSUFFIX
 #undef ADDR_READ
+#undef ENV_PARAM
+#undef ENV_VAR
+#undef CPU_PREFIX
+#undef HELPER_PREFIX
diff --git a/softmmu_template.h b/softmmu_template.h
index 97020f8..644604e 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -54,10 +54,24 @@
 #define ADDR_READ addr_read
 #endif

-static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
+#ifndef CONFIG_TCG_PASS_AREG0
+#define ENV_PARAM
+#define ENV_VAR
+#define CPU_PREFIX
+#define HELPER_PREFIX __
+#else
+#define ENV_PARAM CPUState *env,
+#define ENV_VAR env,
+#define CPU_PREFIX cpu_
+#define HELPER_PREFIX helper_
+#endif
+
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                        target_ulong addr,
                                                         int mmu_idx,
                                                         void *retaddr);
-static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
+static inline DATA_TYPE glue(io_read, SUFFIX)(ENV_PARAM
+                                              target_phys_addr_t physaddr,
                                               target_ulong addr,
                                               void *retaddr)
 {
@@ -89,8 +103,10 @@ static inline DATA_TYPE glue(io_read,
SUFFIX)(target_phys_addr_t physaddr,
 }

 /* handle all cases except unaligned access which span two pages */
-DATA_TYPE REGPARM glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                      int mmu_idx)
+DATA_TYPE REGPARM
+glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                       target_ulong addr,
+                                                       int mmu_idx)
 {
     DATA_TYPE res;
     int index;
@@ -111,22 +127,22 @@ DATA_TYPE REGPARM glue(glue(__ld, SUFFIX),
MMUSUFFIX)(target_ulong addr,
                 goto do_unaligned_access;
             retaddr = GETPC();
             ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
+            res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE,
mmu_idx, retaddr);
 #endif
-            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr,
+            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr,
                                                          mmu_idx, retaddr);
         } else {
             /* unaligned/aligned access in the same page */
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 retaddr = GETPC();
-                do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+                do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE,
mmu_idx, retaddr);
             }
 #endif
             addend = env->tlb_table[mmu_idx][index].addend;
@@ -137,7 +153,7 @@ DATA_TYPE REGPARM glue(glue(__ld, SUFFIX),
MMUSUFFIX)(target_ulong addr,
         retaddr = GETPC();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE,
mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
         goto redo;
@@ -146,7 +162,8 @@ DATA_TYPE REGPARM glue(glue(__ld, SUFFIX),
MMUSUFFIX)(target_ulong addr,
 }

 /* handle all unaligned cases */
-static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                        target_ulong addr,
                                                         int mmu_idx,
                                                         void *retaddr)
 {
@@ -165,15 +182,15 @@ static DATA_TYPE glue(glue(slow_ld, SUFFIX),
MMUSUFFIX)(target_ulong addr,
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
             ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
+            res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
             addr1 = addr & ~(DATA_SIZE - 1);
             addr2 = addr1 + DATA_SIZE;
-            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
+            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr1,
                                                           mmu_idx, retaddr);
-            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
+            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr2,
                                                           mmu_idx, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
 #ifdef TARGET_WORDS_BIGENDIAN
@@ -197,12 +214,14 @@ static DATA_TYPE glue(glue(slow_ld, SUFFIX),
MMUSUFFIX)(target_ulong addr,

 #ifndef SOFTMMU_CODE_ACCESS

-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                   target_ulong addr,
                                                    DATA_TYPE val,
                                                    int mmu_idx,
                                                    void *retaddr);

-static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
+static inline void glue(io_write, SUFFIX)(ENV_PARAM
+                                          target_phys_addr_t physaddr,
                                           DATA_TYPE val,
                                           target_ulong addr,
                                           void *retaddr)
@@ -232,9 +251,11 @@ static inline void glue(io_write,
SUFFIX)(target_phys_addr_t physaddr,
 #endif /* SHIFT > 2 */
 }

-void REGPARM glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                 DATA_TYPE val,
-                                                 int mmu_idx)
+void REGPARM
+glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                       target_ulong addr,
+                                                       DATA_TYPE val,
+                                                       int mmu_idx)
 {
     target_phys_addr_t ioaddr;
     unsigned long addend;
@@ -252,21 +273,21 @@ void REGPARM glue(glue(__st, SUFFIX),
MMUSUFFIX)(target_ulong addr,
                 goto do_unaligned_access;
             retaddr = GETPC();
             ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
+            glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
TARGET_PAGE_SIZE) {
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(addr, 1, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
 #endif
-            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(addr, val,
+            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_VAR addr, val,
                                                    mmu_idx, retaddr);
         } else {
             /* aligned/unaligned access in the same page */
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 retaddr = GETPC();
-                do_unaligned_access(addr, 1, mmu_idx, retaddr);
+                do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
             }
 #endif
             addend = env->tlb_table[mmu_idx][index].addend;
@@ -277,7 +298,7 @@ void REGPARM glue(glue(__st, SUFFIX),
MMUSUFFIX)(target_ulong addr,
         retaddr = GETPC();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(addr, 1, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, 1, mmu_idx, retaddr);
         goto redo;
@@ -285,7 +306,8 @@ void REGPARM glue(glue(__st, SUFFIX),
MMUSUFFIX)(target_ulong addr,
 }

 /* handles all unaligned cases */
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                   target_ulong addr,
                                                    DATA_TYPE val,
                                                    int mmu_idx,
                                                    void *retaddr)
@@ -304,7 +326,7 @@ static void glue(glue(slow_st, SUFFIX),
MMUSUFFIX)(target_ulong addr,
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
             ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
+            glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
@@ -312,10 +334,12 @@ static void glue(glue(slow_st, SUFFIX),
MMUSUFFIX)(target_ulong addr,
              * previous page from the TLB cache.  */
             for(i = DATA_SIZE - 1; i >= 0; i--) {
 #ifdef TARGET_WORDS_BIGENDIAN
-                glue(slow_stb, MMUSUFFIX)(addr + i, val >>
(((DATA_SIZE - 1) * 8) - (i * 8)),
+                glue(slow_stb, MMUSUFFIX)(ENV_VAR addr + i,
+                                          val >> (((DATA_SIZE - 1) *
8) - (i * 8)),
                                           mmu_idx, retaddr);
 #else
-                glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
+                glue(slow_stb, MMUSUFFIX)(ENV_VAR addr + i,
+                                          val >> (i * 8),
                                           mmu_idx, retaddr);
 #endif
             }
@@ -340,3 +364,7 @@ static void glue(glue(slow_st, SUFFIX),
MMUSUFFIX)(target_ulong addr,
 #undef USUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
+#undef ENV_PARAM
+#undef ENV_VAR
+#undef CPU_PREFIX
+#undef HELPER_PREFIX
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 1d32798..e6565f6 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -929,6 +929,27 @@ static inline void tcg_out_goto_label(TCGContext
*s, int cond, int label_index)

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -936,6 +957,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -943,6 +966,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif

 #define TLB_SHIFT	(CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)

@@ -1075,6 +1099,19 @@ static inline void tcg_out_qemu_ld(TCGContext
*s, const TCGArg *args, int opc)
                     TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0));
     tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index);
 # endif
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 bit */
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[2], 0,
+                    tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[1], 0,
+                    tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
+
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
+                    SHIFT_IMM_LSL(0));
+#endif
     tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);

     switch (opc) {
@@ -1341,6 +1378,22 @@ static inline void tcg_out_qemu_st(TCGContext
*s, const TCGArg *args, int opc)
     }
 # endif

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 bit */
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[3], 0,
+                    tcg_target_call_iarg_regs[2], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[2], 0,
+                    tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[1], 0,
+                    tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
+
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
+                    SHIFT_IMM_LSL(0));
+#endif
     tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]);
     if (opc == 3)
         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10);
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 59d4d12..683a496 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -882,6 +882,27 @@ static void tcg_out_setcond2(TCGContext *s, int
cond, TCGArg ret,
 #if defined(CONFIG_SOFTMMU)
 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -889,12 +910,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif

 /* Load and compare a TLB entry, and branch if TLB miss.  OFFSET is set to
    the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate
@@ -1061,6 +1085,15 @@ static void tcg_out_qemu_ld(TCGContext *s,
const TCGArg *args, int opc)
     }
     tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call(s, qemu_ld_helpers[opc & 3]);

     switch (opc) {
@@ -1212,6 +1245,17 @@ static void tcg_out_qemu_st(TCGContext *s,
const TCGArg *args, int opc)
         tcg_abort();
     }

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call(s, qemu_st_helpers[opc]);

     /* label2: */
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index c74bd1a..8b18734 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -61,6 +61,12 @@ static const int tcg_target_reg_alloc_order[] = {
 #endif
 };

+#ifdef CONFIG_TCG_PASS_AREG0
+#define ARG_OFFSET 1
+#else
+#define ARG_OFFSET 0
+#endif
+
 static const int tcg_target_call_iarg_regs[] = {
 #if TCG_TARGET_REG_BITS == 64
     TCG_REG_RDI,
@@ -188,10 +194,16 @@ static int
target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
             tcg_regset_set32(ct->u.regs, 0, 0xffff);
             tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
             tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
+#ifdef CONFIG_TCG_PASS_AREG0
+            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
+#endif
         } else {
             tcg_regset_set32(ct->u.regs, 0, 0xff);
             tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
             tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
+#ifdef CONFIG_TCG_PASS_AREG0
+            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
+#endif
         }
         break;

@@ -967,6 +979,27 @@ static void tcg_out_jmp(TCGContext *s,
tcg_target_long dest)

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void *qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void *qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -974,12 +1007,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif

 /* Perform the TLB load and compare.

@@ -1009,8 +1045,8 @@ static inline void tcg_out_tlb_load(TCGContext
*s, int addrlo_idx,
                                     uint8_t **label_ptr, int which)
 {
     const int addrlo = args[addrlo_idx];
-    const int r0 = tcg_target_call_iarg_regs[0];
-    const int r1 = tcg_target_call_iarg_regs[1];
+    const int r0 = tcg_target_call_iarg_regs[ARG_OFFSET + 0];
+    const int r1 = tcg_target_call_iarg_regs[ARG_OFFSET + 1];
     TCGType type = TCG_TYPE_I32;
     int rexw = 0;

@@ -1168,7 +1204,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const
TCGArg *args,

     /* TLB Hit.  */
     tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
-                           tcg_target_call_iarg_regs[0], 0, opc);
+                           tcg_target_call_iarg_regs[ARG_OFFSET + 0], 0, opc);

     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1184,14 +1220,26 @@ static void tcg_out_qemu_ld(TCGContext *s,
const TCGArg *args,
     }

     /* XXX: move that code at the end of the TB */
-    /* The first argument is already loaded with addrlo.  */
+#ifdef CONFIG_TCG_PASS_AREG0
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+#endif
+    /* The second (or first for legacy helper) argument is already
+       loaded with addrlo.  */
     arg_idx = 1;
     if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
+        tcg_out_mov(s, TCG_TYPE_I32,
tcg_target_call_iarg_regs[ARG_OFFSET + arg_idx++],
                     args[addrlo_idx + 1]);
+#if defined(CONFIG_TCG_PASS_AREG0) && (TCG_TARGET_REG_BITS == 32)
+        /* XXX push, later adjust stack etc. */
+        tcg_abort();
+#else
+        tcg_out_movi(s, TCG_TYPE_I32,
tcg_target_call_iarg_regs[ARG_OFFSET + arg_idx],
+                     mem_index);
+#endif
+    } else {
+        tcg_out_movi(s, TCG_TYPE_I32,
tcg_target_call_iarg_regs[ARG_OFFSET + arg_idx],
+                     mem_index);
     }
-    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
-                 mem_index);
     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);

     switch(opc) {
@@ -1275,7 +1323,7 @@ static void tcg_out_qemu_st_direct(TCGContext
*s, int datalo, int datahi,
        we could perform the bswap twice to restore the original value
        instead of moving to the scratch.  But as it is, the L constraint
        means that the second argument reg is definitely free here.  */
-    int scratch = tcg_target_call_iarg_regs[1];
+    int scratch = tcg_target_call_iarg_regs[ARG_OFFSET + 1];

     switch (sizeop) {
     case 0:
@@ -1349,7 +1397,7 @@ static void tcg_out_qemu_st(TCGContext *s, const
TCGArg *args,

     /* TLB Hit.  */
     tcg_out_qemu_st_direct(s, data_reg, data_reg2,
-                           tcg_target_call_iarg_regs[0], 0, opc);
+                           tcg_target_call_iarg_regs[ARG_OFFSET + 0], 0, opc);

     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1365,49 +1413,93 @@ static void tcg_out_qemu_st(TCGContext *s,
const TCGArg *args,
     }

     /* XXX: move that code at the end of the TB */
+#ifdef CONFIG_TCG_PASS_AREG0
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+#endif
     if (TCG_TARGET_REG_BITS == 64) {
         tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-                    tcg_target_call_iarg_regs[1], data_reg);
-        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
-                     mem_index);
+                    tcg_target_call_iarg_regs[ARG_OFFSET + 1], data_reg);
+#if defined(CONFIG_TCG_PASS_AREG0) && (TCG_TARGET_REG_BITS == 32)
+        tcg_out_pushi(s, mem_index);
+        stack_adjust = 4;
+#else
+        tcg_out_movi(s, TCG_TYPE_I32,
+                     tcg_target_call_iarg_regs[ARG_OFFSET + 2], mem_index);
         stack_adjust = 0;
+#endif
     } else if (TARGET_LONG_BITS == 32) {
-        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], data_reg);
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[ARG_OFFSET + 1],
+                    data_reg);
         if (opc == 3) {
-            tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
-                        data_reg2);
+#if defined(CONFIG_TCG_PASS_AREG0) && (TCG_TARGET_REG_BITS == 32)
+            tcg_out_pushi(s, data_reg2);
+            tcg_out_pushi(s, mem_index);
+            stack_adjust = 8;
+#else
+            tcg_out_mov(s, TCG_TYPE_I32,
+                        tcg_target_call_iarg_regs[ARG_OFFSET + 2], data_reg2);
             tcg_out_pushi(s, mem_index);
             stack_adjust = 4;
+#endif
         } else {
-            tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
-                         mem_index);
+#if defined(CONFIG_TCG_PASS_AREG0) && (TCG_TARGET_REG_BITS == 32)
+            tcg_out_pushi(s, mem_index);
+            stack_adjust = 4;
+#else
+            tcg_out_movi(s, TCG_TYPE_I32,
+                         tcg_target_call_iarg_regs[ARG_OFFSET + 2], mem_index);
             stack_adjust = 0;
+#endif
         }
     } else {
         if (opc == 3) {
-            tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1],
+            tcg_out_mov(s, TCG_TYPE_I32,
+                        tcg_target_call_iarg_regs[ARG_OFFSET + 1],
                         args[addrlo_idx + 1]);
             tcg_out_pushi(s, mem_index);
             tcg_out_push(s, data_reg2);
             tcg_out_push(s, data_reg);
             stack_adjust = 12;
         } else {
-            tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1],
-                        args[addrlo_idx + 1]);
             switch(opc) {
             case 0:
-                tcg_out_ext8u(s, tcg_target_call_iarg_regs[2], data_reg);
+#if defined(CONFIG_TCG_PASS_AREG0) && (TCG_TARGET_REG_BITS == 32)
+                tcg_out_ext8u(s, tcg_target_call_iarg_regs[ARG_OFFSET + 1],
+                              data_reg);
+                tcg_out_pushi(s, tcg_target_call_iarg_regs[ARG_OFFSET + 1]);
+                stack_adjust = 4;
+#else
+                tcg_out_ext8u(s, tcg_target_call_iarg_regs[ARG_OFFSET + 2],
+                              data_reg);
+#endif
                 break;
             case 1:
-                tcg_out_ext16u(s, tcg_target_call_iarg_regs[2], data_reg);
+#if defined(CONFIG_TCG_PASS_AREG0) && (TCG_TARGET_REG_BITS == 32)
+                tcg_out_ext16u(s, tcg_target_call_iarg_regs[ARG_OFFSET + 1],
+                               data_reg);
+                tcg_out_pushi(s, tcg_target_call_iarg_regs[ARG_OFFSET + 1]);
+                stack_adjust = 4;
+#else
+                tcg_out_ext16u(s, tcg_target_call_iarg_regs[ARG_OFFSET + 2],
+                               data_reg);
+#endif
                 break;
             case 2:
-                tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+#if defined(CONFIG_TCG_PASS_AREG0) && (TCG_TARGET_REG_BITS == 32)
+                tcg_out_pushi(s, data_reg);
+                stack_adjust = 4;
+#else
+                tcg_out_mov(s, TCG_TYPE_I32,
+                            tcg_target_call_iarg_regs[ARG_OFFSET + 2],
                             data_reg);
+#endif
                 break;
             }
+            tcg_out_mov(s, TCG_TYPE_I32,
+                        tcg_target_call_iarg_regs[ARG_OFFSET + 1],
+                        args[addrlo_idx + 1]);
             tcg_out_pushi(s, mem_index);
-            stack_adjust = 4;
+            stack_adjust += 4;
         }
     }

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index e3de79f..213e9d8 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1452,12 +1452,25 @@ static inline void tcg_out_qemu_tlb(TCGContext
*s, TCGArg addr_reg,
                                TCG_REG_P7, TCG_REG_R3, TCG_REG_R57));
 }

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
     __ldl_mmu,
     __ldq_mmu,
 };
+#endif

 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1517,6 +1530,15 @@ static inline void tcg_out_qemu_ld(TCGContext
*s, const TCGArg *args, int opc)
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     if (!bswap || s_bits == 0) {
         tcg_out_bundle(s, miB,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
@@ -1547,12 +1569,25 @@ static inline void tcg_out_qemu_ld(TCGContext
*s, const TCGArg *args, int opc)
     }
 }

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif

 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1622,6 +1657,17 @@ static inline void tcg_out_qemu_st(TCGContext
*s, const TCGArg *args, int opc)
         data_reg = TCG_REG_R2;
     }

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
                                data_reg, TCG_REG_R3),
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index c5c3282..3681141 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -750,6 +750,27 @@ static void tcg_out_setcond2(TCGContext *s,
TCGCond cond, int ret,

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -757,6 +778,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -764,6 +787,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif

 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                             int opc)
@@ -858,6 +882,15 @@ static void tcg_out_qemu_ld(TCGContext *s, const
TCGArg *args,
 # endif
     tcg_out_movi(s, TCG_TYPE_I32, sp_args++, mem_index);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9,
(tcg_target_long)qemu_ld_helpers[s_bits]);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 on 32 bit */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
     tcg_out_nop(s);

@@ -1069,6 +1102,17 @@ static void tcg_out_qemu_st(TCGContext *s,
const TCGArg *args,
     }

     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9,
(tcg_target_long)qemu_st_helpers[s_bits]);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 on 32 bit */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
     tcg_out_nop(s);

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index f5d9bf3..d0702d6 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -508,6 +508,27 @@ static void tcg_out_call (TCGContext *s,
tcg_target_long arg, int const_arg)

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -515,6 +536,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -522,6 +545,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif

 static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
 {
@@ -598,6 +622,16 @@ static void tcg_out_qemu_ld (TCGContext *s, const
TCGArg *args, int opc)
     tcg_out_movi (s, TCG_TYPE_I32, 5, mem_index);
 #endif

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
+
     tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
     switch (opc) {
     case 0|4:
@@ -829,6 +863,17 @@ static void tcg_out_qemu_st (TCGContext *s, const
TCGArg *args, int opc)
     ir++;

     tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
     label2_ptr = s->code_ptr;
     tcg_out32 (s, B);
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 4419378..d9fb409 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -552,6 +552,27 @@ static void tcg_out_ldsta (TCGContext *s, int
ret, int addr,

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -559,12 +580,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif

 static void tcg_out_tlb_read (TCGContext *s, int r0, int r1, int r2,
                               int addr_reg, int s_bits, int offset)
@@ -648,6 +672,15 @@ static void tcg_out_qemu_ld (TCGContext *s, const
TCGArg *args, int opc)
     tcg_out_mov (s, TCG_TYPE_I64, 3, addr_reg);
     tcg_out_movi (s, TCG_TYPE_I64, 4, mem_index);

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);

     switch (opc) {
@@ -796,6 +829,17 @@ static void tcg_out_qemu_st (TCGContext *s, const
TCGArg *args, int opc)
     tcg_out_rld (s, RLDICL, 4, data_reg, 0, 64 - (1 << (3 + opc)));
     tcg_out_movi (s, TCG_TYPE_I64, 5, mem_index);

+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);

     label2_ptr = s->code_ptr;
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 9317fe8..3695213 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -301,6 +301,27 @@ static const uint8_t tcg_cond_to_ltr_cond[10] = {

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -308,6 +329,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -315,6 +338,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif

 static uint8_t *tb_ret_addr;

@@ -1483,9 +1507,29 @@ static void tcg_prepare_qemu_ldst(TCGContext*
s, TCGReg data_reg,
             tcg_abort();
         }
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+        /* XXX/FIXME: suboptimal */
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                    tcg_target_call_iarg_regs[1]);
+        tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                    tcg_target_call_iarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                    TCG_AREG0);
+#endif
         tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+        /* XXX/FIXME: suboptimal */
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                    tcg_target_call_iarg_regs[2]);
+        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                    tcg_target_call_iarg_regs[1]);
+        tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                    tcg_target_call_iarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                    TCG_AREG0);
+#endif
         tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);

         /* sign extension */
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 6f69e56..5a8f9d2 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -59,6 +59,12 @@ static const char * const
tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 };
 #endif

+#ifdef CONFIG_TCG_PASS_AREG0
+#define ARG_OFFSET 1
+#else
+#define ARG_OFFSET 0
+#endif
+
 static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_L0,
     TCG_REG_L1,
@@ -88,6 +94,7 @@ static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_O0,
     TCG_REG_O1,
     TCG_REG_O2,
+    TCG_REG_O3,
 };

 static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
@@ -154,6 +161,9 @@ static int
target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         tcg_regset_reset_reg(ct->u.regs, tcg_target_call_oarg_regs[0]);
         tcg_regset_reset_reg(ct->u.regs, tcg_target_call_oarg_regs[1]);
         tcg_regset_reset_reg(ct->u.regs, tcg_target_call_oarg_regs[2]);
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_oarg_regs[3]);
+#endif
         break;
     case 'I':
         ct->ct |= TCG_CT_CONST_S11;
@@ -705,6 +715,27 @@ static void tcg_target_qemu_prologue(TCGContext *s)

 #include "../../softmmu_defs.h"

+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -712,6 +743,8 @@ static const void * const qemu_ld_helpers[4] = {
     __ldq_mmu,
 };

+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static const void * const qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -719,6 +752,7 @@ static const void * const qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif

 #if TARGET_LONG_BITS == 32
 #define TARGET_LD_OP LDUW
@@ -759,9 +793,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const
TCGArg *args,
     mem_index = *args;
     s_bits = opc & 3;

-    arg0 = tcg_target_call_iarg_regs[0];
-    arg1 = tcg_target_call_iarg_regs[1];
-    arg2 = tcg_target_call_iarg_regs[2];
+    arg0 = tcg_target_call_iarg_regs[ARG_OFFSET + 0];
+    arg1 = tcg_target_call_iarg_regs[ARG_OFFSET + 1];
+    arg2 = tcg_target_call_iarg_regs[ARG_OFFSET + 2];

 #if defined(CONFIG_SOFTMMU)
     /* srl addr_reg, x, arg1 */
@@ -796,10 +830,12 @@ static void tcg_out_qemu_ld(TCGContext *s, const
TCGArg *args,
     tcg_out32(s, 0);

     /* mov (delay slot) */
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], addr_reg);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[ARG_OFFSET + 0],
+                addr_reg);

     /* mov */
-    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], mem_index);
+    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[ARG_OFFSET + 1],
+                 mem_index);

     /* XXX: move that code at the end of the TB */
     /* qemu_ld_helper[s_bits](arg0, arg1) */
@@ -820,7 +856,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const
TCGArg *args,
     switch(opc) {
     case 0 | 4:
         /* sll arg0, 24/56, data_reg */
-        tcg_out_arithi(s, data_reg, tcg_target_call_oarg_regs[0],
+        tcg_out_arithi(s, data_reg, tcg_target_call_oarg_regs[ARG_OFFSET + 0],
                        (int)sizeof(tcg_target_long) * 8 - 8, HOST_SLL_OP);
         /* sra data_reg, 24/56, data_reg */
         tcg_out_arithi(s, data_reg, data_reg,
@@ -828,7 +864,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const
TCGArg *args,
         break;
     case 1 | 4:
         /* sll arg0, 16/48, data_reg */
-        tcg_out_arithi(s, data_reg, tcg_target_call_oarg_regs[0],
+        tcg_out_arithi(s, data_reg, tcg_target_call_oarg_regs[ARG_OFFSET + 0],
                        (int)sizeof(tcg_target_long) * 8 - 16, HOST_SLL_OP);
         /* sra data_reg, 16/48, data_reg */
         tcg_out_arithi(s, data_reg, data_reg,
@@ -836,7 +872,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const
TCGArg *args,
         break;
     case 2 | 4:
         /* sll arg0, 32, data_reg */
-        tcg_out_arithi(s, data_reg, tcg_target_call_oarg_regs[0],
+        tcg_out_arithi(s, data_reg, tcg_target_call_oarg_regs[ARG_OFFSET + 0],
                        32, HOST_SLL_OP);
         /* sra data_reg, 32, data_reg */
         tcg_out_arithi(s, data_reg, data_reg, 32, HOST_SRA_OP);
@@ -847,7 +883,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const
TCGArg *args,
     case 3:
     default:
         /* mov */
-        tcg_out_mov(s, TCG_TYPE_REG, data_reg, tcg_target_call_oarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_REG, data_reg,
+                    tcg_target_call_oarg_regs[ARG_OFFSET + 0]);
         break;
     }

@@ -971,9 +1008,9 @@ static void tcg_out_qemu_st(TCGContext *s, const
TCGArg *args,

     s_bits = opc;

-    arg0 = tcg_target_call_iarg_regs[0];
-    arg1 = tcg_target_call_iarg_regs[1];
-    arg2 = tcg_target_call_iarg_regs[2];
+    arg0 = tcg_target_call_iarg_regs[ARG_OFFSET + 0];
+    arg1 = tcg_target_call_iarg_regs[ARG_OFFSET + 1];
+    arg2 = tcg_target_call_iarg_regs[ARG_OFFSET + 2];

 #if defined(CONFIG_SOFTMMU)
     /* srl addr_reg, x, arg1 */
@@ -1009,13 +1046,16 @@ static void tcg_out_qemu_st(TCGContext *s,
const TCGArg *args,
     tcg_out32(s, 0);

     /* mov (delay slot) */
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_oarg_regs[0], addr_reg);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_oarg_regs[ARG_OFFSET + 0],
+                addr_reg);

     /* mov */
-    tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_oarg_regs[1], data_reg);
+    tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_oarg_regs[ARG_OFFSET + 1],
+                data_reg);

     /* mov */
-    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_oarg_regs[2], mem_index);
+    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_oarg_regs[ARG_OFFSET + 2],
+                 mem_index);

     /* XXX: move that code at the end of the TB */
     /* qemu_st_helper[s_bits](arg0, arg1, arg2) */
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index fc0880c..72c83c9 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -798,6 +798,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode
opc, const TCGArg *args,
     case INDEX_op_qemu_st8:
     case INDEX_op_qemu_st16:
     case INDEX_op_qemu_st32:
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_out_r(s, TCG_AREG0);
+#endif
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -808,6 +811,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode
opc, const TCGArg *args,
 #endif
         break;
     case INDEX_op_qemu_st64:
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_out_r(s, TCG_AREG0);
+#endif
         tcg_out_r(s, *args++);
 #if TCG_TARGET_REG_BITS == 32
         tcg_out_r(s, *args++);
-- 
1.6.2.4

[-- Attachment #2: 0005-softmmu-templates-optionally-pass-CPUState-to-memory.patch --]
[-- Type: text/x-diff, Size: 58719 bytes --]

From 33b61308b0d6fb656924d2144b928e679b4fadef Mon Sep 17 00:00:00 2001
Message-Id: <33b61308b0d6fb656924d2144b928e679b4fadef.1325974684.git.blauwirbel@gmail.com>
In-Reply-To: <9773a03cec6b6c0dc3832087ca7ccb46bab7a784.1325974684.git.blauwirbel@gmail.com>
References: <9773a03cec6b6c0dc3832087ca7ccb46bab7a784.1325974684.git.blauwirbel@gmail.com>
From: Blue Swirl <blauwirbel@gmail.com>
Date: Sun, 18 Sep 2011 14:55:46 +0000
Subject: [PATCH 5/6] softmmu templates: optionally pass CPUState to memory access functions

Optionally, make memory access helpers take a parameter for CPUState
instead of relying on global env.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 cpu-all.h              |    9 +++
 exec-all.h             |    2 +
 exec.c                 |    4 ++
 softmmu_defs.h         |   20 +++++++
 softmmu_header.h       |   60 +++++++++++++++++-----
 softmmu_template.h     |   82 ++++++++++++++++++++----------
 tcg/arm/tcg-target.c   |   53 +++++++++++++++++++
 tcg/hppa/tcg-target.c  |   44 ++++++++++++++++
 tcg/i386/tcg-target.c  |  130 +++++++++++++++++++++++++++++++++++++++---------
 tcg/ia64/tcg-target.c  |   46 +++++++++++++++++
 tcg/mips/tcg-target.c  |   44 ++++++++++++++++
 tcg/ppc/tcg-target.c   |   45 +++++++++++++++++
 tcg/ppc64/tcg-target.c |   44 ++++++++++++++++
 tcg/s390/tcg-target.c  |   44 ++++++++++++++++
 tcg/sparc/tcg-target.c |   70 ++++++++++++++++++++------
 tcg/tci/tcg-target.c   |    6 ++
 16 files changed, 624 insertions(+), 79 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index e2c3c49..53aa008 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -259,12 +259,21 @@ extern unsigned long reserved_va;
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)
 
+#ifndef CONFIG_TCG_PASS_AREG0
 #define ldub_code(p) ldub_raw(p)
 #define ldsb_code(p) ldsb_raw(p)
 #define lduw_code(p) lduw_raw(p)
 #define ldsw_code(p) ldsw_raw(p)
 #define ldl_code(p) ldl_raw(p)
 #define ldq_code(p) ldq_raw(p)
+#else
+#define cpu_ldub_code(env1, p) ldub_raw(p)
+#define cpu_ldsb_code(env1, p) ldsb_raw(p)
+#define cpu_lduw_code(env1, p) lduw_raw(p)
+#define cpu_ldsw_code(env1, p) ldsw_raw(p)
+#define cpu_ldl_code(env1, p) ldl_raw(p)
+#define cpu_ldq_code(env1, p) ldq_raw(p)
+#endif
 
 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
diff --git a/exec-all.h b/exec-all.h
index 51d01f2..88a57f7 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -311,7 +311,9 @@ void tlb_fill(CPUState *env1, target_ulong addr, int is_write, int mmu_idx,
 
 #define ACCESS_TYPE (NB_MMU_MODES + 1)
 #define MEMSUFFIX _code
+#ifndef CONFIG_TCG_PASS_AREG0
 #define env cpu_single_env
+#endif
 
 #define DATA_SIZE 1
 #include "softmmu_header.h"
diff --git a/exec.c b/exec.c
index b1d6602..fd4ba4e 100644
--- a/exec.c
+++ b/exec.c
@@ -4375,7 +4375,11 @@ tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr)
     mmu_idx = cpu_mmu_index(env1);
     if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
                  (addr & TARGET_PAGE_MASK))) {
+#ifdef CONFIG_TCG_PASS_AREG0
+        cpu_ldub_code(env1, addr);
+#else
         ldub_code(addr);
+#endif
     }
     pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
     if (pd != io_mem_ram.ram_addr && pd != io_mem_rom.ram_addr
diff --git a/softmmu_defs.h b/softmmu_defs.h
index c5a2bcd..ba949e3 100644
--- a/softmmu_defs.h
+++ b/softmmu_defs.h
@@ -9,6 +9,7 @@
 #ifndef SOFTMMU_DEFS_H
 #define SOFTMMU_DEFS_H
 
+#ifndef CONFIG_TCG_PASS_AREG0
 uint8_t REGPARM __ldb_mmu(target_ulong addr, int mmu_idx);
 void REGPARM __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx);
 uint16_t REGPARM __ldw_mmu(target_ulong addr, int mmu_idx);
@@ -26,5 +27,24 @@ uint32_t REGPARM __ldl_cmmu(target_ulong addr, int mmu_idx);
 void REGPARM __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx);
 uint64_t REGPARM __ldq_cmmu(target_ulong addr, int mmu_idx);
 void REGPARM __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx);
+#else
+uint8_t REGPARM helper_ldb_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void REGPARM helper_stb_mmu(CPUState *env, target_ulong addr, uint8_t val, int mmu_idx);
+uint16_t REGPARM helper_ldw_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void REGPARM helper_stw_mmu(CPUState *env, target_ulong addr, uint16_t val, int mmu_idx);
+uint32_t REGPARM helper_ldl_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void REGPARM helper_stl_mmu(CPUState *env, target_ulong addr, uint32_t val, int mmu_idx);
+uint64_t REGPARM helper_ldq_mmu(CPUState *env, target_ulong addr, int mmu_idx);
+void REGPARM helper_stq_mmu(CPUState *env, target_ulong addr, uint64_t val, int mmu_idx);
+
+uint8_t REGPARM helper_ldb_cmmu(CPUState *env, target_ulong addr, int mmu_idx);
+void REGPARM helper_stb_cmmu(CPUState *env, target_ulong addr, uint8_t val, int mmu_idx);
+uint16_t REGPARM helper_ldw_cmmu(CPUState *env, target_ulong addr, int mmu_idx);
+void REGPARM helper_stw_cmmu(CPUState *env, target_ulong addr, uint16_t val, int mmu_idx);
+uint32_t REGPARM helper_ldl_cmmu(CPUState *env, target_ulong addr, int mmu_idx);
+void REGPARM helper_stl_cmmu(CPUState *env, target_ulong addr, uint32_t val, int mmu_idx);
+uint64_t REGPARM helper_ldq_cmmu(CPUState *env, target_ulong addr, int mmu_idx);
+void REGPARM helper_stq_cmmu(CPUState *env, target_ulong addr, uint64_t val, int mmu_idx);
+#endif
 
 #endif
diff --git a/softmmu_header.h b/softmmu_header.h
index 818d7b6..3c0b298 100644
--- a/softmmu_header.h
+++ b/softmmu_header.h
@@ -78,9 +78,23 @@
 #define ADDR_READ addr_read
 #endif
 
+#ifndef CONFIG_TCG_PASS_AREG0
+#define ENV_PARAM
+#define ENV_VAR
+#define CPU_PREFIX
+#define HELPER_PREFIX __
+#else
+#define ENV_PARAM CPUState *env,
+#define ENV_VAR env,
+#define CPU_PREFIX cpu_
+#define HELPER_PREFIX helper_
+#endif
+
 /* generic load/store macros */
 
-static inline RES_TYPE glue(glue(ld, USUFFIX), MEMSUFFIX)(target_ulong ptr)
+static inline RES_TYPE
+glue(glue(glue(CPU_PREFIX, ld), USUFFIX), MEMSUFFIX)(ENV_PARAM
+                                                     target_ulong ptr)
 {
     int page_index;
     RES_TYPE res;
@@ -93,7 +107,9 @@ static inline RES_TYPE glue(glue(ld, USUFFIX), MEMSUFFIX)(target_ulong ptr)
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx);
+        res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_VAR
+                                                                     addr,
+                                                                     mmu_idx);
     } else {
         physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)physaddr);
@@ -102,7 +118,9 @@ static inline RES_TYPE glue(glue(ld, USUFFIX), MEMSUFFIX)(target_ulong ptr)
 }
 
 #if DATA_SIZE <= 2
-static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
+static inline int
+glue(glue(glue(CPU_PREFIX, lds), SUFFIX), MEMSUFFIX)(ENV_PARAM
+                                                     target_ulong ptr)
 {
     int res, page_index;
     target_ulong addr;
@@ -114,7 +132,8 @@ static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = (DATA_STYPE)glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx);
+        res = (DATA_STYPE)glue(glue(glue(HELPER_PREFIX, ld), SUFFIX),
+                               MMUSUFFIX)(ENV_VAR addr, mmu_idx);
     } else {
         physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(lds, SUFFIX), _raw)((uint8_t *)physaddr);
@@ -127,7 +146,9 @@ static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 
 /* generic store macro */
 
-static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+static inline void
+glue(glue(glue(CPU_PREFIX, st), SUFFIX), MEMSUFFIX)(ENV_PARAM target_ulong ptr,
+                                                    RES_TYPE v)
 {
     int page_index;
     target_ulong addr;
@@ -139,7 +160,8 @@ static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        glue(glue(__st, SUFFIX), MMUSUFFIX)(addr, v, mmu_idx);
+        glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_VAR addr, v,
+                                                               mmu_idx);
     } else {
         physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         glue(glue(st, SUFFIX), _raw)((uint8_t *)physaddr, v);
@@ -151,46 +173,52 @@ static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE
 #if ACCESS_TYPE != (NB_MMU_MODES + 1)
 
 #if DATA_SIZE == 8
-static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
+static inline float64 glue(glue(CPU_PREFIX, ldfq), MEMSUFFIX)(ENV_PARAM
+                                                              target_ulong ptr)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
-    u.i = glue(ldq, MEMSUFFIX)(ptr);
+    u.i = glue(glue(CPU_PREFIX, ldq), MEMSUFFIX)(ENV_VAR ptr);
     return u.d;
 }
 
-static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v)
+static inline void glue(glue(CPU_PREFIX, stfq), MEMSUFFIX)(ENV_PARAM
+                                                           target_ulong ptr,
+                                                           float64 v)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
     u.d = v;
-    glue(stq, MEMSUFFIX)(ptr, u.i);
+    glue(glue(CPU_PREFIX, stq), MEMSUFFIX)(ENV_VAR ptr, u.i);
 }
 #endif /* DATA_SIZE == 8 */
 
 #if DATA_SIZE == 4
-static inline float32 glue(ldfl, MEMSUFFIX)(target_ulong ptr)
+static inline float32 glue(glue(CPU_PREFIX, ldfl), MEMSUFFIX)(ENV_PARAM
+                                                              target_ulong ptr)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
-    u.i = glue(ldl, MEMSUFFIX)(ptr);
+    u.i = glue(glue(CPU_PREFIX, ldl), MEMSUFFIX)(ENV_VAR ptr);
     return u.f;
 }
 
-static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
+static inline void glue(glue(CPU_PREFIX, stfl), MEMSUFFIX)(ENV_PARAM
+                                                           target_ulong ptr,
+                                                           float32 v)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
     u.f = v;
-    glue(stl, MEMSUFFIX)(ptr, u.i);
+    glue(glue(CPU_PREFIX, stl), MEMSUFFIX)(ENV_VAR ptr, u.i);
 }
 #endif /* DATA_SIZE == 4 */
 
@@ -205,3 +233,7 @@ static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
 #undef CPU_MMU_INDEX
 #undef MMUSUFFIX
 #undef ADDR_READ
+#undef ENV_PARAM
+#undef ENV_VAR
+#undef CPU_PREFIX
+#undef HELPER_PREFIX
diff --git a/softmmu_template.h b/softmmu_template.h
index 97020f8..644604e 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -54,10 +54,24 @@
 #define ADDR_READ addr_read
 #endif
 
-static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
+#ifndef CONFIG_TCG_PASS_AREG0
+#define ENV_PARAM
+#define ENV_VAR
+#define CPU_PREFIX
+#define HELPER_PREFIX __
+#else
+#define ENV_PARAM CPUState *env,
+#define ENV_VAR env,
+#define CPU_PREFIX cpu_
+#define HELPER_PREFIX helper_
+#endif
+
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                        target_ulong addr,
                                                         int mmu_idx,
                                                         void *retaddr);
-static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
+static inline DATA_TYPE glue(io_read, SUFFIX)(ENV_PARAM
+                                              target_phys_addr_t physaddr,
                                               target_ulong addr,
                                               void *retaddr)
 {
@@ -89,8 +103,10 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
 }
 
 /* handle all cases except unaligned access which span two pages */
-DATA_TYPE REGPARM glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                      int mmu_idx)
+DATA_TYPE REGPARM
+glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                       target_ulong addr,
+                                                       int mmu_idx)
 {
     DATA_TYPE res;
     int index;
@@ -111,22 +127,22 @@ DATA_TYPE REGPARM glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
                 goto do_unaligned_access;
             retaddr = GETPC();
             ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
+            res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
 #endif
-            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr,
+            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr,
                                                          mmu_idx, retaddr);
         } else {
             /* unaligned/aligned access in the same page */
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 retaddr = GETPC();
-                do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+                do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
             }
 #endif
             addend = env->tlb_table[mmu_idx][index].addend;
@@ -137,7 +153,7 @@ DATA_TYPE REGPARM glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
         retaddr = GETPC();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
         goto redo;
@@ -146,7 +162,8 @@ DATA_TYPE REGPARM glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
 }
 
 /* handle all unaligned cases */
-static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                        target_ulong addr,
                                                         int mmu_idx,
                                                         void *retaddr)
 {
@@ -165,15 +182,15 @@ static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
             ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
+            res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
             addr1 = addr & ~(DATA_SIZE - 1);
             addr2 = addr1 + DATA_SIZE;
-            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
+            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr1,
                                                           mmu_idx, retaddr);
-            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
+            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr2,
                                                           mmu_idx, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
 #ifdef TARGET_WORDS_BIGENDIAN
@@ -197,12 +214,14 @@ static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
 
 #ifndef SOFTMMU_CODE_ACCESS
 
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                   target_ulong addr,
                                                    DATA_TYPE val,
                                                    int mmu_idx,
                                                    void *retaddr);
 
-static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
+static inline void glue(io_write, SUFFIX)(ENV_PARAM
+                                          target_phys_addr_t physaddr,
                                           DATA_TYPE val,
                                           target_ulong addr,
                                           void *retaddr)
@@ -232,9 +251,11 @@ static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
 #endif /* SHIFT > 2 */
 }
 
-void REGPARM glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                 DATA_TYPE val,
-                                                 int mmu_idx)
+void REGPARM
+glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                       target_ulong addr,
+                                                       DATA_TYPE val,
+                                                       int mmu_idx)
 {
     target_phys_addr_t ioaddr;
     unsigned long addend;
@@ -252,21 +273,21 @@ void REGPARM glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr,
                 goto do_unaligned_access;
             retaddr = GETPC();
             ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
+            glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(addr, 1, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
 #endif
-            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(addr, val,
+            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_VAR addr, val,
                                                    mmu_idx, retaddr);
         } else {
             /* aligned/unaligned access in the same page */
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 retaddr = GETPC();
-                do_unaligned_access(addr, 1, mmu_idx, retaddr);
+                do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
             }
 #endif
             addend = env->tlb_table[mmu_idx][index].addend;
@@ -277,7 +298,7 @@ void REGPARM glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr,
         retaddr = GETPC();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(addr, 1, mmu_idx, retaddr);
+            do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, 1, mmu_idx, retaddr);
         goto redo;
@@ -285,7 +306,8 @@ void REGPARM glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr,
 }
 
 /* handles all unaligned cases */
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
+                                                   target_ulong addr,
                                                    DATA_TYPE val,
                                                    int mmu_idx,
                                                    void *retaddr)
@@ -304,7 +326,7 @@ static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
             ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
+            glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
@@ -312,10 +334,12 @@ static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
              * previous page from the TLB cache.  */
             for(i = DATA_SIZE - 1; i >= 0; i--) {
 #ifdef TARGET_WORDS_BIGENDIAN
-                glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
+                glue(slow_stb, MMUSUFFIX)(ENV_VAR addr + i,
+                                          val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
                                           mmu_idx, retaddr);
 #else
-                glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
+                glue(slow_stb, MMUSUFFIX)(ENV_VAR addr + i,
+                                          val >> (i * 8),
                                           mmu_idx, retaddr);
 #endif
             }
@@ -340,3 +364,7 @@ static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
 #undef USUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
+#undef ENV_PARAM
+#undef ENV_VAR
+#undef CPU_PREFIX
+#undef HELPER_PREFIX
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 1d32798..e6565f6 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -929,6 +929,27 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index)
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -936,6 +957,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -943,6 +966,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 #define TLB_SHIFT	(CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
 
@@ -1075,6 +1099,19 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                     TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0));
     tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index);
 # endif
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 bit */
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[2], 0,
+                    tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[1], 0,
+                    tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
+
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
+                    SHIFT_IMM_LSL(0));
+#endif
     tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);
 
     switch (opc) {
@@ -1341,6 +1378,22 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     }
 # endif
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 bit */
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[3], 0,
+                    tcg_target_call_iarg_regs[2], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[2], 0,
+                    tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[1], 0,
+                    tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
+
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
+                    SHIFT_IMM_LSL(0));
+#endif
     tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]);
     if (opc == 3)
         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10);
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 59d4d12..683a496 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -882,6 +882,27 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
 #if defined(CONFIG_SOFTMMU)
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -889,12 +910,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 /* Load and compare a TLB entry, and branch if TLB miss.  OFFSET is set to
    the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate
@@ -1061,6 +1085,15 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     }
     tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call(s, qemu_ld_helpers[opc & 3]);
 
     switch (opc) {
@@ -1212,6 +1245,17 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         tcg_abort();
     }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call(s, qemu_st_helpers[opc]);
 
     /* label2: */
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index c74bd1a..83331fc 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -61,6 +61,12 @@ static const int tcg_target_reg_alloc_order[] = {
 #endif
 };
 
+#ifdef CONFIG_TCG_PASS_AREG0
+#define ARG_OFFSET 1
+#else
+#define ARG_OFFSET 0
+#endif
+
 static const int tcg_target_call_iarg_regs[] = {
 #if TCG_TARGET_REG_BITS == 64
     TCG_REG_RDI,
@@ -188,10 +194,16 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
             tcg_regset_set32(ct->u.regs, 0, 0xffff);
             tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
             tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
+#ifdef CONFIG_TCG_PASS_AREG0
+            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
+#endif
         } else {
             tcg_regset_set32(ct->u.regs, 0, 0xff);
             tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
             tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
+#ifdef CONFIG_TCG_PASS_AREG0
+            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
+#endif
         }
         break;
 
@@ -967,6 +979,27 @@ static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void *qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void *qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -974,12 +1007,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 /* Perform the TLB load and compare.
 
@@ -1009,8 +1045,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
                                     uint8_t **label_ptr, int which)
 {
     const int addrlo = args[addrlo_idx];
-    const int r0 = tcg_target_call_iarg_regs[0];
-    const int r1 = tcg_target_call_iarg_regs[1];
+    const int r0 = tcg_target_call_iarg_regs[ARG_OFFSET + 0];
+    const int r1 = tcg_target_call_iarg_regs[ARG_OFFSET + 1];
     TCGType type = TCG_TYPE_I32;
     int rexw = 0;
 
@@ -1168,7 +1204,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     /* TLB Hit.  */
     tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
-                           tcg_target_call_iarg_regs[0], 0, opc);
+                           tcg_target_call_iarg_regs[ARG_OFFSET + 0], 0, opc);
 
     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1184,13 +1220,17 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     }
 
     /* XXX: move that code at the end of the TB */
-    /* The first argument is already loaded with addrlo.  */
+#ifdef CONFIG_TCG_PASS_AREG0
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+#endif
+    /* The second (or first for legacy helper) argument is already
+       loaded with addrlo.  */
     arg_idx = 1;
     if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[ARG_OFFSET + arg_idx++],
                     args[addrlo_idx + 1]);
     }
-    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
+    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[ARG_OFFSET + arg_idx],
                  mem_index);
     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
 
@@ -1275,7 +1315,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
        we could perform the bswap twice to restore the original value
        instead of moving to the scratch.  But as it is, the L constraint
        means that the second argument reg is definitely free here.  */
-    int scratch = tcg_target_call_iarg_regs[1];
+    int scratch = tcg_target_call_iarg_regs[ARG_OFFSET + 1];
 
     switch (sizeop) {
     case 0:
@@ -1349,7 +1389,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 
     /* TLB Hit.  */
     tcg_out_qemu_st_direct(s, data_reg, data_reg2,
-                           tcg_target_call_iarg_regs[0], 0, opc);
+                           tcg_target_call_iarg_regs[ARG_OFFSET + 0], 0, opc);
 
     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1365,49 +1405,93 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     }
 
     /* XXX: move that code at the end of the TB */
+#ifdef CONFIG_TCG_PASS_AREG0
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+#endif
     if (TCG_TARGET_REG_BITS == 64) {
         tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-                    tcg_target_call_iarg_regs[1], data_reg);
-        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
-                     mem_index);
+                    tcg_target_call_iarg_regs[ARG_OFFSET + 1], data_reg);
+#if defined(CONFIG_TCG_PASS_AREG0) && (TCG_TARGET_REG_BITS == 32)
+        tcg_out_pushi(s, mem_index);
+        stack_adjust = 4;
+#else
+        tcg_out_movi(s, TCG_TYPE_I32,
+                     tcg_target_call_iarg_regs[ARG_OFFSET + 2], mem_index);
         stack_adjust = 0;
+#endif
     } else if (TARGET_LONG_BITS == 32) {
-        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], data_reg);
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[ARG_OFFSET + 1],
+                    data_reg);
         if (opc == 3) {
-            tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
-                        data_reg2);
+#if defined(CONFIG_TCG_PASS_AREG0) && (TCG_TARGET_REG_BITS == 32)
+            tcg_out_pushi(s, data_reg2);
+            tcg_out_pushi(s, mem_index);
+            stack_adjust = 8;
+#else
+            tcg_out_mov(s, TCG_TYPE_I32,
+                        tcg_target_call_iarg_regs[ARG_OFFSET + 2], data_reg2);
             tcg_out_pushi(s, mem_index);
             stack_adjust = 4;
+#endif
         } else {
-            tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
-                         mem_index);
+#if defined(CONFIG_TCG_PASS_AREG0) && (TCG_TARGET_REG_BITS == 32)
+            tcg_out_pushi(s, mem_index);
+            stack_adjust = 4;
+#else
+            tcg_out_movi(s, TCG_TYPE_I32,
+                         tcg_target_call_iarg_regs[ARG_OFFSET + 2], mem_index);
             stack_adjust = 0;
+#endif
         }
     } else {
         if (opc == 3) {
-            tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1],
+            tcg_out_mov(s, TCG_TYPE_I32,
+                        tcg_target_call_iarg_regs[ARG_OFFSET + 1],
                         args[addrlo_idx + 1]);
             tcg_out_pushi(s, mem_index);
             tcg_out_push(s, data_reg2);
             tcg_out_push(s, data_reg);
             stack_adjust = 12;
         } else {
-            tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1],
-                        args[addrlo_idx + 1]);
             switch(opc) {
             case 0:
-                tcg_out_ext8u(s, tcg_target_call_iarg_regs[2], data_reg);
+#if defined(CONFIG_TCG_PASS_AREG0) && (TCG_TARGET_REG_BITS == 32)
+                tcg_out_ext8u(s, tcg_target_call_iarg_regs[ARG_OFFSET + 1],
+                              data_reg);
+                tcg_out_pushi(s, tcg_target_call_iarg_regs[ARG_OFFSET + 1]);
+                stack_adjust = 4;
+#else
+                tcg_out_ext8u(s, tcg_target_call_iarg_regs[ARG_OFFSET + 2],
+                              data_reg);
+#endif
                 break;
             case 1:
-                tcg_out_ext16u(s, tcg_target_call_iarg_regs[2], data_reg);
+#if defined(CONFIG_TCG_PASS_AREG0) && (TCG_TARGET_REG_BITS == 32)
+                tcg_out_ext16u(s, tcg_target_call_iarg_regs[ARG_OFFSET + 1],
+                               data_reg);
+                tcg_out_pushi(s, tcg_target_call_iarg_regs[ARG_OFFSET + 1]);
+                stack_adjust = 4;
+#else
+                tcg_out_ext16u(s, tcg_target_call_iarg_regs[ARG_OFFSET + 2],
+                               data_reg);
+#endif
                 break;
             case 2:
-                tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+#if defined(CONFIG_TCG_PASS_AREG0) && (TCG_TARGET_REG_BITS == 32)
+                tcg_out_pushi(s, data_reg);
+                stack_adjust = 4;
+#else
+                tcg_out_mov(s, TCG_TYPE_I32,
+                            tcg_target_call_iarg_regs[ARG_OFFSET + 2],
                             data_reg);
+#endif
                 break;
             }
+            tcg_out_mov(s, TCG_TYPE_I32,
+                        tcg_target_call_iarg_regs[ARG_OFFSET + 1],
+                        args[addrlo_idx + 1]);
             tcg_out_pushi(s, mem_index);
-            stack_adjust = 4;
+            stack_adjust += 4;
         }
     }
 
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index e3de79f..213e9d8 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1452,12 +1452,25 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
                                TCG_REG_P7, TCG_REG_R3, TCG_REG_R57));
 }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
     __ldl_mmu,
     __ldq_mmu,
 };
+#endif
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1517,6 +1530,15 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     if (!bswap || s_bits == 0) {
         tcg_out_bundle(s, miB,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
@@ -1547,12 +1569,25 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     }
 }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1622,6 +1657,17 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         data_reg = TCG_REG_R2;
     }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
                                data_reg, TCG_REG_R3),
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index c5c3282..3681141 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -750,6 +750,27 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, int ret,
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -757,6 +778,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -764,6 +787,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                             int opc)
@@ -858,6 +882,15 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 # endif
     tcg_out_movi(s, TCG_TYPE_I32, sp_args++, mem_index);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_ld_helpers[s_bits]);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 on 32 bit */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
     tcg_out_nop(s);
 
@@ -1069,6 +1102,17 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     }
 
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_st_helpers[s_bits]);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 on 32 bit */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
     tcg_out_nop(s);
 
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index f5d9bf3..d0702d6 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -508,6 +508,27 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg)
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -515,6 +536,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -522,6 +545,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
 {
@@ -598,6 +622,16 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
     tcg_out_movi (s, TCG_TYPE_I32, 5, mem_index);
 #endif
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
+
     tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
     switch (opc) {
     case 0|4:
@@ -829,6 +863,17 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
     ir++;
 
     tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
     label2_ptr = s->code_ptr;
     tcg_out32 (s, B);
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 4419378..d9fb409 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -552,6 +552,27 @@ static void tcg_out_ldsta (TCGContext *s, int ret, int addr,
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -559,12 +580,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 static void tcg_out_tlb_read (TCGContext *s, int r0, int r1, int r2,
                               int addr_reg, int s_bits, int offset)
@@ -648,6 +672,15 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
     tcg_out_mov (s, TCG_TYPE_I64, 3, addr_reg);
     tcg_out_movi (s, TCG_TYPE_I64, 4, mem_index);
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
 
     switch (opc) {
@@ -796,6 +829,17 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
     tcg_out_rld (s, RLDICL, 4, data_reg, 0, 64 - (1 << (3 + opc)));
     tcg_out_movi (s, TCG_TYPE_I64, 5, mem_index);
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
 
     label2_ptr = s->code_ptr;
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 9317fe8..3695213 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -301,6 +301,27 @@ static const uint8_t tcg_cond_to_ltr_cond[10] = {
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -308,6 +329,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -315,6 +338,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 static uint8_t *tb_ret_addr;
 
@@ -1483,9 +1507,29 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
             tcg_abort();
         }
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+        /* XXX/FIXME: suboptimal */
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                    tcg_target_call_iarg_regs[1]);
+        tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                    tcg_target_call_iarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                    TCG_AREG0);
+#endif
         tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+        /* XXX/FIXME: suboptimal */
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                    tcg_target_call_iarg_regs[2]);
+        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                    tcg_target_call_iarg_regs[1]);
+        tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                    tcg_target_call_iarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                    TCG_AREG0);
+#endif
         tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);
 
         /* sign extension */
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 6f69e56..5a8f9d2 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -59,6 +59,12 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 };
 #endif
 
+#ifdef CONFIG_TCG_PASS_AREG0
+#define ARG_OFFSET 1
+#else
+#define ARG_OFFSET 0
+#endif
+
 static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_L0,
     TCG_REG_L1,
@@ -88,6 +94,7 @@ static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_O0,
     TCG_REG_O1,
     TCG_REG_O2,
+    TCG_REG_O3,
 };
 
 static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
@@ -154,6 +161,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         tcg_regset_reset_reg(ct->u.regs, tcg_target_call_oarg_regs[0]);
         tcg_regset_reset_reg(ct->u.regs, tcg_target_call_oarg_regs[1]);
         tcg_regset_reset_reg(ct->u.regs, tcg_target_call_oarg_regs[2]);
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_regset_reset_reg(ct->u.regs, tcg_target_call_oarg_regs[3]);
+#endif
         break;
     case 'I':
         ct->ct |= TCG_CT_CONST_S11;
@@ -705,6 +715,27 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -712,6 +743,8 @@ static const void * const qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static const void * const qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -719,6 +752,7 @@ static const void * const qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 #if TARGET_LONG_BITS == 32
 #define TARGET_LD_OP LDUW
@@ -759,9 +793,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     mem_index = *args;
     s_bits = opc & 3;
 
-    arg0 = tcg_target_call_iarg_regs[0];
-    arg1 = tcg_target_call_iarg_regs[1];
-    arg2 = tcg_target_call_iarg_regs[2];
+    arg0 = tcg_target_call_iarg_regs[ARG_OFFSET + 0];
+    arg1 = tcg_target_call_iarg_regs[ARG_OFFSET + 1];
+    arg2 = tcg_target_call_iarg_regs[ARG_OFFSET + 2];
 
 #if defined(CONFIG_SOFTMMU)
     /* srl addr_reg, x, arg1 */
@@ -796,10 +830,12 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out32(s, 0);
 
     /* mov (delay slot) */
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], addr_reg);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[ARG_OFFSET + 0],
+                addr_reg);
 
     /* mov */
-    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], mem_index);
+    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[ARG_OFFSET + 1],
+                 mem_index);
 
     /* XXX: move that code at the end of the TB */
     /* qemu_ld_helper[s_bits](arg0, arg1) */
@@ -820,7 +856,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     switch(opc) {
     case 0 | 4:
         /* sll arg0, 24/56, data_reg */
-        tcg_out_arithi(s, data_reg, tcg_target_call_oarg_regs[0],
+        tcg_out_arithi(s, data_reg, tcg_target_call_oarg_regs[ARG_OFFSET + 0],
                        (int)sizeof(tcg_target_long) * 8 - 8, HOST_SLL_OP);
         /* sra data_reg, 24/56, data_reg */
         tcg_out_arithi(s, data_reg, data_reg,
@@ -828,7 +864,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         break;
     case 1 | 4:
         /* sll arg0, 16/48, data_reg */
-        tcg_out_arithi(s, data_reg, tcg_target_call_oarg_regs[0],
+        tcg_out_arithi(s, data_reg, tcg_target_call_oarg_regs[ARG_OFFSET + 0],
                        (int)sizeof(tcg_target_long) * 8 - 16, HOST_SLL_OP);
         /* sra data_reg, 16/48, data_reg */
         tcg_out_arithi(s, data_reg, data_reg,
@@ -836,7 +872,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         break;
     case 2 | 4:
         /* sll arg0, 32, data_reg */
-        tcg_out_arithi(s, data_reg, tcg_target_call_oarg_regs[0],
+        tcg_out_arithi(s, data_reg, tcg_target_call_oarg_regs[ARG_OFFSET + 0],
                        32, HOST_SLL_OP);
         /* sra data_reg, 32, data_reg */
         tcg_out_arithi(s, data_reg, data_reg, 32, HOST_SRA_OP);
@@ -847,7 +883,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     case 3:
     default:
         /* mov */
-        tcg_out_mov(s, TCG_TYPE_REG, data_reg, tcg_target_call_oarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_REG, data_reg,
+                    tcg_target_call_oarg_regs[ARG_OFFSET + 0]);
         break;
     }
 
@@ -971,9 +1008,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 
     s_bits = opc;
 
-    arg0 = tcg_target_call_iarg_regs[0];
-    arg1 = tcg_target_call_iarg_regs[1];
-    arg2 = tcg_target_call_iarg_regs[2];
+    arg0 = tcg_target_call_iarg_regs[ARG_OFFSET + 0];
+    arg1 = tcg_target_call_iarg_regs[ARG_OFFSET + 1];
+    arg2 = tcg_target_call_iarg_regs[ARG_OFFSET + 2];
 
 #if defined(CONFIG_SOFTMMU)
     /* srl addr_reg, x, arg1 */
@@ -1009,13 +1046,16 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out32(s, 0);
 
     /* mov (delay slot) */
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_oarg_regs[0], addr_reg);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_oarg_regs[ARG_OFFSET + 0],
+                addr_reg);
 
     /* mov */
-    tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_oarg_regs[1], data_reg);
+    tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_oarg_regs[ARG_OFFSET + 1],
+                data_reg);
 
     /* mov */
-    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_oarg_regs[2], mem_index);
+    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_oarg_regs[ARG_OFFSET + 2],
+                 mem_index);
 
     /* XXX: move that code at the end of the TB */
     /* qemu_st_helper[s_bits](arg0, arg1, arg2) */
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index fc0880c..72c83c9 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -798,6 +798,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_qemu_st8:
     case INDEX_op_qemu_st16:
     case INDEX_op_qemu_st32:
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_out_r(s, TCG_AREG0);
+#endif
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -808,6 +811,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 #endif
         break;
     case INDEX_op_qemu_st64:
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_out_r(s, TCG_AREG0);
+#endif
         tcg_out_r(s, *args++);
 #if TCG_TARGET_REG_BITS == 32
         tcg_out_r(s, *args++);
-- 
1.7.2.5


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2012-01-07 22:27 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-07 22:26 [Qemu-devel] [PATCH 5/6] softmmu templates: optionally pass CPUState to memory access functions Blue Swirl

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.