All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] RFC: reverse-endian softmmu memory accessors
@ 2007-10-13  9:56 J. Mayer
  2007-10-13 10:47 ` Blue Swirl
  0 siblings, 1 reply; 20+ messages in thread
From: J. Mayer @ 2007-10-13  9:56 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 2319 bytes --]

The problem:
some CPU architectures, namely PowerPC and maybe others, offers
facilities to access the memory or I/O in the reverse endianness, ie
little-endian instead of big-endian for PowerPC, or provide instruction
to make memory accesses in the "reverse-endian". This is implemented as
a global flag on some CPU. This case is already handled by the PowerPC
emulation but is is far from being optimal. Some other implementations
allow the OS to store an "reverse-endian" flag in the TLB or the segment
descriptors, thus providing per-page or per-segment endianness control.
This is mostly used to ease driver migration from a PC platform to
PowerPC without taking any care of the device endianness in the driver
code (yes, this is bad...).

Proposal:
here's a patch that implement "reverse-endian" low-level memory
accessors. It also provide an IO_MEM_REVERSE flag for TLBs. This flag is
handled in the I/O case of softmmu low-level routines which means that
it does not slowdown "native-endian" memory accesses and only add a
one-bit test on the "native-endian" I/O access, which should not be a
problem as I/O accesses are already slower, being handled via a
callback. As a side effect this patch allows me to delete large parts of
the target-ppc/op_mem.h and target-ppc/op_helper_mem.h as it makes
little-endian memory accessors directly available. And the translated
code for all little-endian access also becomes smaller, which has even a
visible effect on the mean translated block size (as reported by the
"info jit" monitor command) as lwbrx and lhbrx, which does memory
accesses with byteswap, are widely used in PowerPC code.

Warning:
this patch is to be taken as a proof of concept, for now. It works and
does not bring any visible regression to the PowerPC emulation but may
be bugged somewhere and generates conflicts if applied against the
CPU_MMU_INDEX patch. It is very invasive in the PowerPC target code but
needs just a few adds for other targets. It also brings quite a lot of
changes in the softmmu headers but is supposed not to change the
"native-endian" pathes (or it's a bug).
One will also notice that I also added "reverse-endian" byte access
routines. Those are, in fact, not needed but I let them just for
consistency.

Please comment.

-- 
J. Mayer <l_indien@magic.fr>
Never organized

[-- Attachment #2: softmmu_reverse_endian.diff --]
[-- Type: text/x-patch, Size: 87071 bytes --]

Index: cpu-all.h
===================================================================
RCS file: /sources/qemu/qemu/cpu-all.h,v
retrieving revision 1.76
diff -u -d -d -p -r1.76 cpu-all.h
--- cpu-all.h	23 Sep 2007 15:28:03 -0000	1.76
+++ cpu-all.h	12 Oct 2007 07:14:43 -0000
@@ -161,7 +161,7 @@ typedef union {
  *
  * endian is:
  * (empty): target cpu endianness or 8 bit access
- *   r    : reversed target cpu endianness (not implemented yet)
+ *   r    : reversed target cpu endianness
  *   be   : big endian (not implemented yet)
  *   le   : little endian (not implemented yet)
  *
@@ -557,6 +557,7 @@ static inline void stfq_be_p(void *ptr, 
 
 /* target CPU memory access functions */
 #if defined(TARGET_WORDS_BIGENDIAN)
+/* native-endian */
 #define lduw_p(p) lduw_be_p(p)
 #define ldsw_p(p) ldsw_be_p(p)
 #define ldl_p(p) ldl_be_p(p)
@@ -568,7 +569,20 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_p(p, v) stq_be_p(p, v)
 #define stfl_p(p, v) stfl_be_p(p, v)
 #define stfq_p(p, v) stfq_be_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_le_p(p)
+#define ldswr_p(p) ldsw_le_p(p)
+#define ldlr_p(p) ldl_le_p(p)
+#define ldqr_p(p) ldq_le_p(p)
+#define ldflr_p(p) ldfl_le_p(p)
+#define ldfqr_p(p) ldfq_le_p(p)
+#define stwr_p(p, v) stw_le_p(p, v)
+#define stlr_p(p, v) stl_le_p(p, v)
+#define stqr_p(p, v) stq_le_p(p, v)
+#define stflr_p(p, v) stfl_le_p(p, v)
+#define stfqr_p(p, v) stfq_le_p(p, v)
 #else
+/* native-endian */
 #define lduw_p(p) lduw_le_p(p)
 #define ldsw_p(p) ldsw_le_p(p)
 #define ldl_p(p) ldl_le_p(p)
@@ -580,6 +594,18 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_p(p, v) stq_le_p(p, v)
 #define stfl_p(p, v) stfl_le_p(p, v)
 #define stfq_p(p, v) stfq_le_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_be_p(p)
+#define ldswr_p(p) ldsw_be_p(p)
+#define ldlr_p(p) ldl_be_p(p)
+#define ldqr_p(p) ldq_be_p(p)
+#define ldflr_p(p) ldfl_be_p(p)
+#define ldfqr_p(p) ldfq_be_p(p)
+#define stwr_p(p, v) stw_be_p(p, v)
+#define stlr_p(p, v) stl_be_p(p, v)
+#define stqr_p(p, v) stq_be_p(p, v)
+#define stflr_p(p, v) stfl_be_p(p, v)
+#define stfqr_p(p, v) stfq_be_p(p, v)
 #endif
 
 /* MMU memory access macros */
@@ -605,6 +631,7 @@ static inline void stfq_be_p(void *ptr, 
 #define laddr(x) (uint8_t *)(long)(x)
 #endif
 
+/* native-endian */
 #define ldub_raw(p) ldub_p(laddr((p)))
 #define ldsb_raw(p) ldsb_p(laddr((p)))
 #define lduw_raw(p) lduw_p(laddr((p)))
@@ -619,11 +646,26 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_raw(p, v) stq_p(saddr((p)), v)
 #define stfl_raw(p, v) stfl_p(saddr((p)), v)
 #define stfq_raw(p, v) stfq_p(saddr((p)), v)
-
+/* reverse endian */
+#define ldubr_raw(p) ldub_p(laddr((p)))
+#define ldsbr_raw(p) ldsb_p(laddr((p)))
+#define lduwr_raw(p) lduwr_p(laddr((p)))
+#define ldswr_raw(p) ldswr_p(laddr((p)))
+#define ldlr_raw(p) ldlr_p(laddr((p)))
+#define ldqr_raw(p) ldqr_p(laddr((p)))
+#define ldflr_raw(p) ldflr_p(laddr((p)))
+#define ldfqr_raw(p) ldfqr_p(laddr((p)))
+#define stbr_raw(p, v) stb_p(saddr((p)), v)
+#define stwr_raw(p, v) stwr_p(saddr((p)), v)
+#define stlr_raw(p, v) stlr_p(saddr((p)), v)
+#define stqr_raw(p, v) stqr_p(saddr((p)), v)
+#define stflr_raw(p, v) stflr_p(saddr((p)), v)
+#define stfqr_raw(p, v) stfqr_p(saddr((p)), v)
 
 #if defined(CONFIG_USER_ONLY)
 
 /* if user mode, no other memory access functions */
+/* native-endian */
 #define ldub(p) ldub_raw(p)
 #define ldsb(p) ldsb_raw(p)
 #define lduw(p) lduw_raw(p)
@@ -638,14 +680,38 @@ static inline void stfq_be_p(void *ptr, 
 #define stq(p, v) stq_raw(p, v)
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr(p) ldub_raw(p)
+#define ldsbr(p) ldsb_raw(p)
+#define lduwr(p) lduwr_raw(p)
+#define ldswr(p) ldswr_raw(p)
+#define ldlr(p) ldlr_raw(p)
+#define ldqr(p) ldqr_raw(p)
+#define ldflr(p) ldflr_raw(p)
+#define ldfqr(p) ldfqr_raw(p)
+#define stbr(p, v) stb_raw(p, v)
+#define stwr(p, v) stwr_raw(p, v)
+#define stlr(p, v) stlr_raw(p, v)
+#define stqr(p, v) stqr_raw(p, v)
+#define stflr(p, v) stflr_raw(p, v)
+#define stfqr(p, v) stfqr_raw(p, v)
 
+/* native-endian */
 #define ldub_code(p) ldub_raw(p)
 #define ldsb_code(p) ldsb_raw(p)
 #define lduw_code(p) lduw_raw(p)
 #define ldsw_code(p) ldsw_raw(p)
 #define ldl_code(p) ldl_raw(p)
 #define ldq_code(p) ldq_raw(p)
+/* reverse-endian */
+#define ldubr_code(p) ldub_raw(p)
+#define ldsbr_code(p) ldsb_raw(p)
+#define lduwr_code(p) lduwr_raw(p)
+#define ldswr_code(p) ldswr_raw(p)
+#define ldlr_code(p) ldlr_raw(p)
+#define ldqr_code(p) ldqr_raw(p)
 
+/* native-endian */
 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
 #define lduw_kernel(p) lduw_raw(p)
@@ -660,6 +726,21 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_kernel(p, v) stq_raw(p, v)
 #define stfl_kernel(p, v) stfl_raw(p, v)
 #define stfq_kernel(p, vt) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr_kernel(p) ldub_raw(p)
+#define ldsbr_kernel(p) ldsb_raw(p)
+#define lduwr_kernel(p) lduwr_raw(p)
+#define ldswr_kernel(p) ldswr_raw(p)
+#define ldlr_kernel(p) ldlr_raw(p)
+#define ldqr_kernel(p) ldqr_raw(p)
+#define ldflr_kernel(p) ldflr_raw(p)
+#define ldfqr_kernel(p) ldfqr_raw(p)
+#define stbr_kernel(p, v) stbr_raw(p, v)
+#define stwr_kernel(p, v) stwr_raw(p, v)
+#define stlr_kernel(p, v) stlr_raw(p, v)
+#define stqr_kernel(p, v) stqr_raw(p, v)
+#define stflr_kernel(p, v) stflr_raw(p, v)
+#define stfqr_kernel(p, vt) stfqr_raw(p, v)
 
 #endif /* defined(CONFIG_USER_ONLY) */
 
@@ -790,6 +871,8 @@ extern uint8_t *phys_ram_dirty;
    the physical address */
 #define IO_MEM_ROMD        (1)
 #define IO_MEM_SUBPAGE     (2)
+/* On some target CPUs, endiannes is stored in page tables */
+#define IO_MEM_REVERSE     (3)
 
 typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
 typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
Index: exec-all.h
===================================================================
RCS file: /sources/qemu/qemu/exec-all.h,v
retrieving revision 1.67
diff -u -d -d -p -r1.67 exec-all.h
--- exec-all.h	8 Oct 2007 13:16:14 -0000	1.67
+++ exec-all.h	12 Oct 2007 07:14:43 -0000
@@ -562,6 +567,7 @@ extern int tb_invalidated_flag;
 #define MEMSUFFIX _code
 #define env cpu_single_env
 
+/* native-endian */
 #define DATA_SIZE 1
 #include "softmmu_header.h"
 
@@ -581,6 +587,21 @@ void tlb_fill(target_ulong addr, int is_
 #define DATA_SIZE 8
 #include "softmmu_header.h"
 
+/* reverse-endian */
+#define REVERSE_ENDIAN
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef REVERSE_ENDIAN
+
 #undef ACCESS_TYPE
 #undef MEMSUFFIX
 #undef env
Index: exec.c
===================================================================
RCS file: /sources/qemu/qemu/exec.c,v
retrieving revision 1.108
diff -u -d -d -p -r1.108 exec.c
--- exec.c	8 Oct 2007 13:16:14 -0000	1.108
+++ exec.c	12 Oct 2007 07:14:43 -0000
@@ -2507,7 +2507,7 @@ void cpu_physical_memory_rw(target_phys_
     uint8_t *ptr;
     uint32_t val;
     target_phys_addr_t page;
-    unsigned long pd;
+    unsigned long pd, addr1;
     PhysPageDesc *p;
 
     while (len > 0) {
@@ -2524,31 +2524,54 @@ void cpu_physical_memory_rw(target_phys_
 
         if (is_write) {
             if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                /* XXX: could force cpu_single_env to NULL to avoid
-                   potential bugs */
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit write access */
-                    val = ldl_p(buf);
-                    io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit write access */
-                    val = lduw_p(buf);
-                    io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        stlr_p(ptr, *(uint32_t *)buf);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        stwr_p(ptr, *(uint16_t *)buf);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *ptr = *buf;
+                    goto invalidate_code;
                 } else {
-                    /* 8 bit write access */
-                    val = ldub_p(buf);
-                    io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
-                    l = 1;
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    /* XXX: could force cpu_single_env to NULL to avoid
+                       potential bugs */
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit write access */
+                        val = ldl_p(buf);
+                        io_mem_write[io_index][2](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit write access */
+                        val = lduw_p(buf);
+                        io_mem_write[io_index][1](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit write access */
+                        val = ldub_p(buf);
+                        io_mem_write[io_index][0](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 1;
+                    }
                 }
             } else {
-                unsigned long addr1;
                 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
                 /* RAM case */
                 ptr = phys_ram_base + addr1;
                 memcpy(ptr, buf, l);
+            invalidate_code:
                 if (!cpu_physical_memory_is_dirty(addr1)) {
                     /* invalidate code */
                     tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
@@ -2560,23 +2583,45 @@ void cpu_physical_memory_rw(target_phys_
         } else {
             if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
                 !(pd & IO_MEM_ROMD)) {
-                /* I/O case */
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit read access */
-                    val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
-                    stl_p(buf, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit read access */
-                    val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
-                    stw_p(buf, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        *(uint32_t *)buf = ldlr_p(ptr);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        *(uint16_t *)buf = lduwr_p(ptr);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *buf = *ptr;
                 } else {
-                    /* 8 bit read access */
-                    val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr);
-                    stb_p(buf, val);
-                    l = 1;
+                    /* I/O case */
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit read access */
+                        val = io_mem_read[io_index][2](io_mem_opaque[io_index],
+                                                       addr);
+                        stl_p(buf, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit read access */
+                        val = io_mem_read[io_index][1](io_mem_opaque[io_index],
+                                                       addr);
+                        stw_p(buf, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit read access */
+                        val = io_mem_read[io_index][0](io_mem_opaque[io_index],
+                                                       addr);
+                        stb_p(buf, val);
+                        l = 1;
+                    }
                 }
             } else {
                 /* RAM case */
@@ -2907,6 +2952,21 @@ void dump_exec_info(FILE *f,
 #define env cpu_single_env
 #define SOFTMMU_CODE_ACCESS
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2918,6 +2978,7 @@ void dump_exec_info(FILE *f,
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 #undef env
 
Index: softmmu_header.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_header.h,v
retrieving revision 1.17
diff -u -d -d -p -r1.17 softmmu_header.h
--- softmmu_header.h	8 Oct 2007 13:16:14 -0000	1.17
+++ softmmu_header.h	12 Oct 2007 07:14:43 -0000
@@ -17,6 +17,9 @@
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
+
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
@@ -38,7 +41,31 @@
 #else
 #error unsupported data size
 #endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX lr
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#else
+#error unsupported data size
+#endif
+#endif /* defined(REVERSE_ENDIAN) */
 
 #if ACCESS_TYPE == 0
 
 #define CPU_MEM_INDEX 0
@@ -322,7 +302,8 @@ static inline void glue(glue(st, SUFFIX)
 #endif /* !asm */
 
 #if ACCESS_TYPE != 3
 
+#if !defined(REVERSE_ENDIAN)
 #if DATA_SIZE == 8
 static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
 {
@@ -386,6 +367,54 @@ static inline void glue(stfl, MEMSUFFIX)
 }
 #endif /* DATA_SIZE == 4 */
 
+#else /* defined(REVERSE_ENDIAN) */
+
+#if DATA_SIZE == 8
+static inline float64 glue(ldfqr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.i = glue(ldqr, MEMSUFFIX)(ptr);
+    return u.d;
+}
+
+static inline void glue(stfqr, MEMSUFFIX)(target_ulong ptr, float64 v)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.d = v;
+    glue(stqr, MEMSUFFIX)(ptr, u.i);
+}
+#endif /* DATA_SIZE == 8 */
+
+#if DATA_SIZE == 4
+static inline float32 glue(ldflr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = glue(ldlr, MEMSUFFIX)(ptr);
+    return u.f;
+}
+
+static inline void glue(stflr, MEMSUFFIX)(target_ulong ptr, float32 v)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    glue(stlr, MEMSUFFIX)(ptr, u.i);
+}
+#endif /* DATA_SIZE == 4 */
+
+#endif /* defined(REVERSE_ENDIAN) */
+
 #endif /* ACCESS_TYPE != 3 */
 
 #undef RES_TYPE
Index: softmmu_template.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_template.h,v
retrieving revision 1.18
diff -u -d -d -p -r1.18 softmmu_template.h
--- softmmu_template.h	17 Sep 2007 08:09:45 -0000	1.18
+++ softmmu_template.h	12 Oct 2007 07:14:43 -0000
@@ -19,25 +19,66 @@
  */
 #define DATA_SIZE (1 << SHIFT)
 
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
+#define RSUFFIX qr
+#define URSUFFIX qr
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
 #define USUFFIX l
+#define RSUFFIX lr
+#define URSUFFIX lr
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define USUFFIX uw
+#define RSUFFIX wr
+#define URSUFFIX uwr
 #define DATA_TYPE uint16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define USUFFIX ub
+#define RSUFFIX br
+#define URSUFFIX ubr
+#define DATA_TYPE uint8_t
+#else
+#error unsupported data size
+#endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define RSUFFIX q
+#define URSUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX lr
+#define RSUFFIX l
+#define URSUFFIX l
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define RSUFFIX w
+#define URSUFFIX uw
+#define DATA_TYPE uint16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define RSUFFIX b
+#define URSUFFIX ub
 #define DATA_TYPE uint8_t
 #else
 #error unsupported data size
 #endif
+#endif /* defined(REVERSE_ENDIAN) */
+
 
 #ifdef SOFTMMU_CODE_ACCESS
 #define READ_ACCESS_TYPE 2
@@ -47,6 +88,24 @@
 #define ADDR_READ addr_read
 #endif
 
+#if (defined(TARGET_WORDS_BIGENDIAN) && !defined(REVERSE_ENDIAN)) || \
+    (!defined(TARGET_WORDS_BIGENDIAN) && defined(REVERSE_ENDIAN))
+#define ACCESS_WORDS_BIGENDIAN
+#endif
+
+/* Beware: we do not have reverse-endian accessors for IOs */
+#if defined(REVERSE_ENDIAN)
+#if SHIFT == 1
+#define IOSWAP(val) bswap16(val)
+#elif SHIFT >= 2
+#define IOSWAP(val) bswap32(val)
+#else
+#define IOSWAP(val) (val)
+#endif
+#else
+#define IOSWAP(val) (val)
+#endif
+
 static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
                                                         int is_user,
                                                         void *retaddr);
@@ -59,13 +118,16 @@ static inline DATA_TYPE glue(io_read, SU
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
 #if SHIFT <= 2
     res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
+    res = IOSWAP(res);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    res = (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr) << 32;
-    res |= io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    res = (uint64_t)IOSWAP(io_mem_read[index][2](io_mem_opaque[index],
+                                                 physaddr)) << 32;
+    res |= IOSWAP(io_mem_read[index][2](io_mem_opaque[index], physaddr + 4));
 #else
-    res = io_mem_read[index][2](io_mem_opaque[index], physaddr);
-    res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
+    res = IOSWAP(io_mem_read[index][2](io_mem_opaque[index], physaddr));
+    res |= (uint64_t)IOSWAP(io_mem_read[index][2](io_mem_opaque[index],
+                                                  physaddr + 4)) << 32;
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -88,9 +150,33 @@ DATA_TYPE REGPARM(1) glue(glue(__ld, SUF
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                /* Specific case for reverse endian page read */
+                if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                    TARGET_PAGE_SIZE) {
+                    /* slow unaligned access (it spans two pages or IO) */
+                    retaddr = GETPC();
+#ifdef ALIGNED_ONLY
+                    do_unaligned_access(addr, READ_ACCESS_TYPE,
+                                        is_user, retaddr);
+#endif
+                    res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr, is_user,
+                                                                 retaddr);
+                } else {
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, READ_ACCESS_TYPE,
+                                            is_user, retaddr);
+                    }
+#endif
+                    res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
@@ -140,9 +226,37 @@ static DATA_TYPE glue(glue(slow_ld, SUFF
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                /* Specific case for reverse endian page write */
+                if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                    TARGET_PAGE_SIZE) {
+                    /* slow unaligned access (it spans two pages) */
+                    addr1 = addr & ~(DATA_SIZE - 1);
+                    addr2 = addr1 + DATA_SIZE;
+                    res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
+                                                                  is_user,
+                                                                  retaddr);
+                    res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
+                                                                  is_user,
+                                                                  retaddr);
+                    shift = (addr & (DATA_SIZE - 1)) * 8;
+#ifdef ACCESS_WORDS_BIGENDIAN
+                    res = (res1 >> shift) |
+                        (res2 << ((DATA_SIZE * 8) - shift));
+#else
+                    res = (res1 << shift) |
+                        (res2 >> ((DATA_SIZE * 8) - shift));
+#endif
+                    res = (DATA_TYPE)res;
+                } else {
+                    /* unaligned/aligned access in the same page */
+                    res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
@@ -194,14 +308,16 @@ static inline void glue(io_write, SUFFIX
     env->mem_write_vaddr = tlb_addr;
     env->mem_write_pc = (unsigned long)retaddr;
 #if SHIFT <= 2
+    val = IOSWAP(val);
     io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val >> 32);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, IOSWAP(val >> 32));
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, IOSWAP(val));
 #else
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, IOSWAP(val));
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4,
+                           IOSWAP(val >> 32));
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -220,12 +336,36 @@ void REGPARM(2) glue(glue(__st, SUFFIX),
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            retaddr = GETPC();
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                /* Specific case for reverse endian page read */
+                if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                    TARGET_PAGE_SIZE) {
+                    /* slow unaligned access (it spans two pages or IO) */
+                    retaddr = GETPC();
+#ifdef ALIGNED_ONLY
+                    do_unaligned_access(addr, 1, is_user, retaddr);
+#endif
+                    glue(glue(slow_st, SUFFIX), MMUSUFFIX)(addr, val,
+                                                           is_user, retaddr);
+                } else {
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, 1, is_user, retaddr);
+                    }
+#endif
+                    glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr,
+                                                  val);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                retaddr = GETPC();
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+            /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
@@ -271,15 +411,39 @@ static void glue(glue(slow_st, SUFFIX), 
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                /* Specific case for reverse endian page read */
+                if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                    TARGET_PAGE_SIZE) {
+                    /* slow unaligned access (it spans two pages or IO) */
+                    /* XXX: not efficient, but simple */
+                    for(i = 0;i < DATA_SIZE; i++) {
+#ifdef ACCESS_WORDS_BIGENDIAN
+                        glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
+                                                  is_user, retaddr);
+#else
+                        glue(slow_stb, MMUSUFFIX)(addr + i,
+                                                  val >> (((DATA_SIZE - 1) * 8)
+                                                          - (i * 8)),
+                                                  is_user, retaddr);
+#endif
+                    }
+                } else {
+                    /* aligned/unaligned access in the same page */
+                    glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr,
+                                                  val);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
             for(i = 0;i < DATA_SIZE; i++) {
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
                 glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
                                           is_user, retaddr);
 #else
@@ -297,10 +461,14 @@ static void glue(glue(slow_st, SUFFIX), 
 
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
 
+#undef IOSWAP
+#undef ACCESS_WORDS_BIGENDIAN
 #undef READ_ACCESS_TYPE
 #undef SHIFT
 #undef DATA_TYPE
 #undef SUFFIX
 #undef USUFFIX
+#undef RSUFFIX
+#undef URSUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
Index: target-alpha/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/exec.h,v
retrieving revision 1.3
diff -u -d -d -p -r1.3 exec.h
--- target-alpha/exec.h	16 Sep 2007 21:08:01 -0000	1.3
+++ target-alpha/exec.h	12 Oct 2007 07:14:46 -0000
@@ -62,6 +62,9 @@ register uint64_t T2 asm(AREG3);
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-alpha/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/op_helper.c,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_helper.c
--- target-alpha/op_helper.c	16 Sep 2007 21:08:01 -0000	1.2
+++ target-alpha/op_helper.c	12 Oct 2007 07:14:46 -0000
@@ -1207,6 +1207,21 @@ void helper_st_phys_to_virt (void)
 
 #define MMUSUFFIX _mmu
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1218,6 +1233,7 @@ void helper_st_phys_to_virt (void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-arm/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-arm/exec.h,v
retrieving revision 1.13
diff -u -d -d -p -r1.13 exec.h
--- target-arm/exec.h	16 Sep 2007 21:08:01 -0000	1.13
+++ target-arm/exec.h	12 Oct 2007 07:14:47 -0000
@@ -64,6 +64,9 @@ static inline int cpu_halted(CPUState *e
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 /* In op_helper.c */
Index: target-arm/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/op_helper.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 op_helper.c
--- target-arm/op_helper.c	16 Sep 2007 21:08:02 -0000	1.6
+++ target-arm/op_helper.c	12 Oct 2007 07:14:47 -0000
@@ -180,6 +180,21 @@ void do_vfp_get_fpscr(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -191,6 +206,7 @@ void do_vfp_get_fpscr(void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-cris/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-cris/exec.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 exec.h
--- target-cris/exec.h	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/exec.h	12 Oct 2007 07:14:47 -0000
@@ -45,6 +45,9 @@ static inline void regs_to_env(void)
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_cris_flush_flags(CPUCRISState *env, int cc_op);
Index: target-cris/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/op_helper.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 op_helper.c
--- target-cris/op_helper.c	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/op_helper.c	12 Oct 2007 07:14:47 -0000
@@ -25,6 +25,21 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -36,6 +51,7 @@
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-i386/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/exec.h,v
retrieving revision 1.37
diff -u -d -d -p -r1.37 exec.h
--- target-i386/exec.h	23 Sep 2007 15:28:04 -0000	1.37
+++ target-i386/exec.h	12 Oct 2007 07:14:47 -0000
@@ -217,6 +217,9 @@ void check_iol_DX(void);
 #if !defined(CONFIG_USER_ONLY)
 
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 
 static inline double ldfq(target_ulong ptr)
 {
Index: target-i386/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/helper.c,v
retrieving revision 1.89
diff -u -d -d -p -r1.89 helper.c
--- target-i386/helper.c	27 Sep 2007 01:52:00 -0000	1.89
+++ target-i386/helper.c	12 Oct 2007 07:14:47 -0000
@@ -3867,6 +3867,21 @@ void update_fp_status(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -3878,6 +3893,7 @@ void update_fp_status(void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 #endif
 
Index: target-m68k/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/exec.h,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 exec.h
--- target-m68k/exec.h	16 Sep 2007 21:08:03 -0000	1.4
+++ target-m68k/exec.h	12 Oct 2007 07:14:47 -0000
@@ -38,6 +38,9 @@ static inline void regs_to_env(void)
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_m68k_flush_flags(CPUM68KState *env, int cc_op);
Index: target-m68k/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/op_helper.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 op_helper.c
--- target-m68k/op_helper.c	16 Sep 2007 21:08:03 -0000	1.6
+++ target-m68k/op_helper.c	12 Oct 2007 07:14:47 -0000
@@ -33,6 +33,21 @@ extern int semihosting_enabled;
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -44,6 +59,7 @@ extern int semihosting_enabled;
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-mips/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-mips/exec.h,v
retrieving revision 1.38
diff -u -d -d -p -r1.38 exec.h
--- target-mips/exec.h	9 Oct 2007 03:39:58 -0000	1.38
+++ target-mips/exec.h	12 Oct 2007 07:14:48 -0000
@@ -54,6 +54,9 @@ register target_ulong T2 asm(AREG3);
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 #if defined(TARGET_MIPSN32) || defined(TARGET_MIPS64)
Index: target-mips/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/op_helper.c,v
retrieving revision 1.65
diff -u -d -d -p -r1.65 op_helper.c
--- target-mips/op_helper.c	9 Oct 2007 03:39:58 -0000	1.65
+++ target-mips/op_helper.c	12 Oct 2007 07:14:48 -0000
@@ -544,6 +544,21 @@ static void do_unaligned_access (target_
 #define MMUSUFFIX _mmu
 #define ALIGNED_ONLY
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -555,6 +570,7 @@ static void do_unaligned_access (target_
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr)
 {
Index: target-ppc/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/exec.h,v
retrieving revision 1.28
diff -u -d -d -p -r1.28 exec.h
--- target-ppc/exec.h	7 Oct 2007 18:19:25 -0000	1.28
+++ target-ppc/exec.h	12 Oct 2007 07:14:48 -0000
@@ -91,7 +91,12 @@ static always_inline target_ulong rotl64
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
+
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
+
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 void do_raise_exception_err (uint32_t exception, int error_code);
Index: target-ppc/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper.c,v
retrieving revision 1.49
diff -u -d -d -p -r1.49 op_helper.c
--- target-ppc/op_helper.c	7 Oct 2007 17:13:43 -0000	1.49
+++ target-ppc/op_helper.c	12 Oct 2007 07:14:49 -0000
@@ -2291,6 +2301,7 @@ DO_SPE_OP1(fsctuf);
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2303,6 +2314,21 @@ DO_SPE_OP1(fsctuf);
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
Index: target-ppc/op_helper.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper.h,v
retrieving revision 1.21
diff -u -d -d -p -r1.21 op_helper.h
--- target-ppc/op_helper.h	7 Oct 2007 17:13:44 -0000	1.21
+++ target-ppc/op_helper.h	12 Oct 2007 07:14:49 -0000
@@ -37,19 +37,6 @@ void glue(do_POWER2_lfq_le, MEMSUFFIX) (
 void glue(do_POWER2_stfq, MEMSUFFIX) (void);
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void);
 
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst);
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stsw_64, MEMSUFFIX) (int src);
-void glue(do_stsw_le_64, MEMSUFFIX) (int src);
-void glue(do_lmw_64, MEMSUFFIX) (int dst);
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stmw_64, MEMSUFFIX) (int src);
-void glue(do_stmw_le_64, MEMSUFFIX) (int src);
-void glue(do_icbi_64, MEMSUFFIX) (void);
-void glue(do_dcbz_64, MEMSUFFIX) (void);
-#endif
-
 #else
 
 void do_print_mem_EA (target_ulong EA);
Index: target-ppc/op_helper_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper_mem.h,v
retrieving revision 1.14
diff -u -d -d -p -r1.14 op_helper_mem.h
--- target-ppc/op_helper_mem.h	7 Oct 2007 17:13:44 -0000	1.14
+++ target-ppc/op_helper_mem.h	12 Oct 2007 07:14:49 -0000
@@ -19,85 +19,33 @@
  */
 
 /* Multiple word / string load and store */
-static always_inline target_ulong glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000UL) >> 24) | ((tmp & 0x00FF0000UL) >> 8) |
-        ((tmp & 0x0000FF00UL) << 8) | ((tmp & 0x000000FFUL) << 24);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  target_ulong data)
-{
-    uint32_t tmp =
-        ((data & 0xFF000000UL) >> 24) | ((data & 0x00FF0000UL) >> 8) |
-        ((data & 0x0000FF00UL) << 8) | ((data & 0x000000FFUL) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
 void glue(do_lmw, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldl, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lmw_le, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldlr, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw_le, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_le_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lsw, MEMSUFFIX) (int dst)
 {
@@ -105,71 +53,33 @@ void glue(do_lsw, MEMSUFFIX) (int dst)
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst++] = glue(ldl, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
-#endif
 
 void glue(do_stsw, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 void glue(do_lsw_le, MEMSUFFIX) (int dst)
 {
@@ -177,71 +87,33 @@ void glue(do_lsw_le, MEMSUFFIX) (int dst
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst++] = glue(ldlr, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
-#endif
 
 void glue(do_stsw_le, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_le_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 /* Instruction cache invalidation helper */
 void glue(do_icbi, MEMSUFFIX) (void)
@@ -252,27 +124,11 @@ void glue(do_icbi, MEMSUFFIX) (void)
      * (not a fetch) by the MMU. To be sure it will be so,
      * do the load "by hand".
      */
-    tmp = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint32_t)T0,
-                             (uint32_t)(T0 + env->icache_line_size));
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_icbi_64, MEMSUFFIX) (void)
-{
-    uint64_t tmp;
-    /* Invalidate one cache line :
-     * PowerPC specification says this is to be treated like a load
-     * (not a fetch) by the MMU. To be sure it will be so,
-     * do the load "by hand".
-     */
-    tmp = glue(ldq, MEMSUFFIX)((uint64_t)T0);
+    tmp = glue(ldl, MEMSUFFIX)(T0);
     T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint64_t)T0,
-                             (uint64_t)(T0 + env->icache_line_size));
+    /* We assume it would not wrap around 2^32 on 32 bits targets */
+    tb_invalidate_page_range(T0, T0 + env->icache_line_size);
 }
-#endif
 
 void glue(do_dcbz, MEMSUFFIX) (void)
 {
@@ -281,90 +137,43 @@ void glue(do_dcbz, MEMSUFFIX) (void)
     /* XXX: should be 970 specific (?) */
     if (((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1)
         dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x1C), 0);
-    if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x3CUL), 0);
-        if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x7CUL), 0);
-        }
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_dcbz_64, MEMSUFFIX) (void)
-{
-    int dcache_line_size = env->dcache_line_size;
-
-    /* XXX: should be 970 specific (?) */
-    if (((env->spr[SPR_970_HID5] >> 6) & 0x3) == 0x2)
-        dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x1C), 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x00, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x04, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x08, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x0C, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x10, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x14, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x18, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x1C, 0);
     if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x3CUL), 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x20UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x24UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x28UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x2CUL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x30UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x34UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x38UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x3CUL, 0);
         if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x7CUL), 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x40UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x44UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x48UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x4CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x50UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x54UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x58UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x5CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x60UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x64UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x68UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x6CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x70UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x74UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x78UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x7CUL, 0);
         }
     }
 }
-#endif
 
 /* PowerPC 601 specific instructions (POWER bridge) */
 // XXX: to be tested
@@ -400,26 +209,6 @@ void glue(do_POWER2_lfq, MEMSUFFIX) (voi
     FT1 = glue(ldfq, MEMSUFFIX)((uint32_t)(T0 + 4));
 }
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 void glue(do_POWER2_lfq_le, MEMSUFFIX) (void)
 {
     FT0 = glue(ldfqr, MEMSUFFIX)((uint32_t)(T0 + 4));
@@ -432,25 +221,6 @@ void glue(do_POWER2_stfq, MEMSUFFIX) (vo
     glue(stfq, MEMSUFFIX)((uint32_t)(T0 + 4), FT1);
 }
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void)
 {
     glue(stfqr, MEMSUFFIX)((uint32_t)(T0 + 4), FT0);
Index: target-ppc/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_mem.h,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 op_mem.h
--- target-ppc/op_mem.h	7 Oct 2007 18:19:25 -0000	1.22
+++ target-ppc/op_mem.h	12 Oct 2007 07:14:49 -0000
@@ -18,82 +18,15 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-static always_inline uint16_t glue(ld16r, MEMSUFFIX) (target_ulong EA)
-{
-    uint16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline int32_t glue(ld16rs, MEMSUFFIX) (target_ulong EA)
-{
-    int16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return (int16_t)((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline uint32_t glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline uint64_t glue(ld64r, MEMSUFFIX) (target_ulong EA)
-{
-    uint64_t tmp = glue(ldq, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00000000000000ULL) >> 56) |
-        ((tmp & 0x00FF000000000000ULL) >> 40) |
-        ((tmp & 0x0000FF0000000000ULL) >> 24) |
-        ((tmp & 0x000000FF00000000ULL) >> 8) |
-        ((tmp & 0x00000000FF000000ULL) << 8) |
-        ((tmp & 0x0000000000FF0000ULL) << 24) |
-        ((tmp & 0x000000000000FF00ULL) << 40) |
-        ((tmp & 0x00000000000000FFULL) << 54);
-}
-#endif
-
 #if defined(TARGET_PPC64)
 static always_inline int64_t glue(ldsl, MEMSUFFIX) (target_ulong EA)
 {
     return (int32_t)glue(ldl, MEMSUFFIX)(EA);
 }
 
-static always_inline int64_t glue(ld32rs, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return (int32_t)((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-#endif
-
-static always_inline void glue(st16r, MEMSUFFIX) (target_ulong EA,
-                                                  uint16_t data)
-{
-    uint16_t tmp = ((data & 0xFF00) >> 8) | ((data & 0x00FF) << 8);
-    glue(stw, MEMSUFFIX)(EA, tmp);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  uint32_t data)
-{
-    uint32_t tmp = ((data & 0xFF000000) >> 24) | ((data & 0x00FF0000) >> 8) |
-        ((data & 0x0000FF00) << 8) | ((data & 0x000000FF) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline void glue(st64r, MEMSUFFIX) (target_ulong EA,
-                                                  uint64_t data)
+static always_inline int64_t glue(ldslr, MEMSUFFIX) (target_ulong EA)
 {
-    uint64_t tmp = ((data & 0xFF00000000000000ULL) >> 56) |
-        ((data & 0x00FF000000000000ULL) >> 40) |
-        ((data & 0x0000FF0000000000ULL) >> 24) |
-        ((data & 0x000000FF00000000ULL) >> 8) |
-        ((data & 0x00000000FF000000ULL) << 8) |
-        ((data & 0x0000000000FF0000ULL) << 24) |
-        ((data & 0x000000000000FF00ULL) << 40) |
-        ((data & 0x00000000000000FFULL) << 56);
-    glue(stq, MEMSUFFIX)(EA, tmp);
+    return (int32_t)glue(ldlr, MEMSUFFIX)(EA);
 }
 #endif
 
@@ -130,6 +63,7 @@ void OPPROTO glue(glue(glue(op_st, name)
 }
 #endif
 
+/* Native-endian fixed-point memory loads                                    */
 PPC_LD_OP(bz, ldub);
 PPC_LD_OP(ha, ldsw);
 PPC_LD_OP(hz, lduw);
@@ -145,20 +79,21 @@ PPC_LD_OP_64(hz, lduw);
 PPC_LD_OP_64(wz, ldl);
 #endif
 
-PPC_LD_OP(ha_le, ld16rs);
-PPC_LD_OP(hz_le, ld16r);
-PPC_LD_OP(wz_le, ld32r);
+/* Reverse-endian fixed-point memory loads                                   */
+PPC_LD_OP(ha_le, ldswr);
+PPC_LD_OP(hz_le, lduwr);
+PPC_LD_OP(wz_le, ldlr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP(d_le, ld64r);
-PPC_LD_OP(wa_le, ld32rs);
-PPC_LD_OP_64(d_le, ld64r);
-PPC_LD_OP_64(wa_le, ld32rs);
-PPC_LD_OP_64(ha_le, ld16rs);
-PPC_LD_OP_64(hz_le, ld16r);
-PPC_LD_OP_64(wz_le, ld32r);
+PPC_LD_OP(d_le, ldqr);
+PPC_LD_OP(wa_le, ldslr);
+PPC_LD_OP_64(d_le, ldqr);
+PPC_LD_OP_64(wa_le, ldslr);
+PPC_LD_OP_64(ha_le, ldswr);
+PPC_LD_OP_64(hz_le, lduwr);
+PPC_LD_OP_64(wz_le, ldlr);
 #endif
 
-/***                              Integer store                            ***/
+/* Native-endian fixed-point memory stores                                   */
 PPC_ST_OP(b, stb);
 PPC_ST_OP(h, stw);
 PPC_ST_OP(w, stl);
@@ -170,27 +105,29 @@ PPC_ST_OP_64(h, stw);
 PPC_ST_OP_64(w, stl);
 #endif
 
-PPC_ST_OP(h_le, st16r);
-PPC_ST_OP(w_le, st32r);
+/* Reverse-endian fixed-point memory stores                                  */
+PPC_ST_OP(h_le, stwr);
+PPC_ST_OP(w_le, stlr);
 #if defined(TARGET_PPC64)
-PPC_ST_OP(d_le, st64r);
-PPC_ST_OP_64(d_le, st64r);
-PPC_ST_OP_64(h_le, st16r);
-PPC_ST_OP_64(w_le, st32r);
+PPC_ST_OP(d_le, stqr);
+PPC_ST_OP_64(d_le, stqr);
+PPC_ST_OP_64(h_le, stwr);
+PPC_ST_OP_64(w_le, stlr);
 #endif
 
-/***                Integer load and store with byte reverse               ***/
-PPC_LD_OP(hbr, ld16r);
-PPC_LD_OP(wbr, ld32r);
-PPC_ST_OP(hbr, st16r);
-PPC_ST_OP(wbr, st32r);
+/* Native-endian fixed-point loads and stores with byte-reverse              */
+PPC_LD_OP(hbr, lduwr);
+PPC_LD_OP(wbr, ldlr);
+PPC_ST_OP(hbr, stwr);
+PPC_ST_OP(wbr, stlr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP_64(hbr, ld16r);
-PPC_LD_OP_64(wbr, ld32r);
-PPC_ST_OP_64(hbr, st16r);
-PPC_ST_OP_64(wbr, st32r);
+PPC_LD_OP_64(hbr, lduwr);
+PPC_LD_OP_64(wbr, ldlr);
+PPC_ST_OP_64(hbr, stwr);
+PPC_ST_OP_64(wbr, stlr);
 #endif
 
+/* Reverse-endian fixed-point loads and stores with byte-reverse             */
 PPC_LD_OP(hbr_le, lduw);
 PPC_LD_OP(wbr_le, ldl);
 PPC_ST_OP(hbr_le, stw);
@@ -202,88 +139,76 @@ PPC_ST_OP_64(hbr_le, stw);
 PPC_ST_OP_64(wbr_le, stl);
 #endif
 
-/***                    Integer load and store multiple                    ***/
+/* Native-endian fixed-point loads and stores multiple                       */
 void OPPROTO glue(op_lmw, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
-{
-    glue(do_lmw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
-void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
 {
     glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
+#endif
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
+/* Reverse-endian fixed-point loads and stores multiple                      */
+void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
 {
-    glue(do_stmw_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
 void OPPROTO glue(op_stmw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
-{
-    glue(do_stmw_le_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-/***                    Integer load and store strings                     ***/
-void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw, MEMSUFFIX)(PARAM1);
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+    glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+/* Native-endian loads and stores string                                     */
+void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -303,6 +228,7 @@ void OPPROTO glue(op_lswx, MEMSUFFIX) (v
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
@@ -320,13 +246,44 @@ void OPPROTO glue(op_lswx_64, MEMSUFFIX)
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
+void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
+{
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
+/* Reverse-endian loads and stores string                                    */
+void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+{
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
 void OPPROTO glue(op_lswx_le, MEMSUFFIX) (void)
 {
     /* Note: T1 comes from xer_bc then no cast is needed */
@@ -337,6 +294,7 @@ void OPPROTO glue(op_lswx_le, MEMSUFFIX)
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
@@ -354,29 +312,16 @@ void OPPROTO glue(op_lswx_le_64, MEMSUFF
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
-{
-    glue(do_stsw, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
-{
-    glue(do_stsw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
 void OPPROTO glue(op_stsw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
@@ -384,7 +329,7 @@ void OPPROTO glue(op_stsw_le, MEMSUFFIX)
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_stsw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_stsw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -432,38 +377,9 @@ PPC_STF_OP_64(fs, stfs);
 PPC_STF_OP_64(fiwx, stfiwx);
 #endif
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 static always_inline void glue(stfsr, MEMSUFFIX) (target_ulong EA, double d)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = float64_to_float32(d, &env->fp_status);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stfl, MEMSUFFIX)(EA, u.f);
+    glue(stflr, MEMSUFFIX)(EA, float64_to_float32(d, &env->fp_status));
 }
 
 static always_inline void glue(stfiwxr, MEMSUFFIX) (target_ulong EA, double d)
@@ -475,11 +391,7 @@ static always_inline void glue(stfiwxr, 
 
     /* Store the low order 32 bits without any conversion */
     u.d = d;
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stl, MEMSUFFIX)(EA, u.u);
+    glue(stlr, MEMSUFFIX)(EA, u.u);
 }
 
 PPC_STF_OP(fd_le, stfqr);
@@ -520,40 +432,9 @@ PPC_LDF_OP_64(fd, ldfq);
 PPC_LDF_OP_64(fs, ldfs);
 #endif
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 static always_inline double glue(ldfsr, MEMSUFFIX) (target_ulong EA)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = glue(ldfl, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-
-    return float32_to_float64(u.f, &env->fp_status);
+    return float32_to_float64(glue(ldflr, MEMSUFFIX)(EA), &env->fp_status);
 }
 
 PPC_LDF_OP(fd_le, ldfqr);
@@ -615,7 +496,7 @@ void OPPROTO glue(op_lwarx_le, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldlr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -627,7 +508,7 @@ void OPPROTO glue(op_lwarx_le_64, MEMSUF
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldlr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -638,7 +519,7 @@ void OPPROTO glue(op_ldarx_le, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -649,7 +530,7 @@ void OPPROTO glue(op_ldarx_le_64, MEMSUF
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -731,7 +612,7 @@ void OPPROTO glue(op_stwcx_le, MEMSUFFIX
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -748,7 +629,7 @@ void OPPROTO glue(op_stwcx_le_64, MEMSUF
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -764,7 +645,7 @@ void OPPROTO glue(op_stdcx_le, MEMSUFFIX
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -780,7 +661,7 @@ void OPPROTO glue(op_stdcx_le_64, MEMSUF
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -862,6 +743,7 @@ void OPPROTO glue(op_dcbz_l128, MEMSUFFI
 
 void OPPROTO glue(op_dcbz, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
@@ -940,7 +822,7 @@ void OPPROTO glue(op_dcbz_l128_64, MEMSU
 
 void OPPROTO glue(op_dcbz_64, MEMSUFFIX) (void)
 {
-    glue(do_dcbz_64, MEMSUFFIX)();
+    glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -948,6 +830,7 @@ void OPPROTO glue(op_dcbz_64, MEMSUFFIX)
 /* Instruction cache block invalidate */
 void OPPROTO glue(op_icbi, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
@@ -955,7 +838,7 @@ void OPPROTO glue(op_icbi, MEMSUFFIX) (v
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_icbi_64, MEMSUFFIX) (void)
 {
-    glue(do_icbi_64, MEMSUFFIX)();
+    glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -991,28 +874,28 @@ void OPPROTO glue(op_ecowx_64, MEMSUFFIX
 
 void OPPROTO glue(op_eciwx_le, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldlr, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_le_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldlr, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
 
 void OPPROTO glue(op_ecowx_le, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_ecowx_le_64, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
     RETURN();
 }
 #endif
@@ -1070,8 +953,8 @@ void OPPROTO glue(op_vr_lvx, MEMSUFFIX) 
 
 void OPPROTO glue(op_vr_lvx_le, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint32_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint32_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint32_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx, MEMSUFFIX) (void)
@@ -1082,8 +965,8 @@ void OPPROTO glue(op_vr_stvx, MEMSUFFIX)
 
 void OPPROTO glue(op_vr_stvx_le, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 
 #if defined(TARGET_PPC64)
@@ -1095,8 +978,8 @@ void OPPROTO glue(op_vr_lvx_64, MEMSUFFI
 
 void OPPROTO glue(op_vr_lvx_le_64, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint64_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint64_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint64_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx_64, MEMSUFFIX) (void)
@@ -1107,8 +990,8 @@ void OPPROTO glue(op_vr_stvx_64, MEMSUFF
 
 void OPPROTO glue(op_vr_stvx_le_64, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 #endif
 #undef VR_DWORD0
@@ -1163,8 +1046,8 @@ _PPC_SPE_ST_OP(name, op)
 #if !defined(TARGET_PPC64)
 PPC_SPE_LD_OP(dd, ldq);
 PPC_SPE_ST_OP(dd, stq);
-PPC_SPE_LD_OP(dd_le, ld64r);
-PPC_SPE_ST_OP(dd_le, st64r);
+PPC_SPE_LD_OP(dd_le, ldqr);
+PPC_SPE_ST_OP(dd_le, stqr);
 #endif
 static always_inline uint64_t glue(spe_ldw, MEMSUFFIX) (target_ulong EA)
 {
@@ -1184,16 +1067,16 @@ PPC_SPE_ST_OP(dw, spe_stdw);
 static always_inline uint64_t glue(spe_ldw_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld32r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld32r, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldlr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldlr, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw_le, spe_ldw_le);
 static always_inline void glue(spe_stdw_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data >> 32);
-    glue(st32r, MEMSUFFIX)(EA + 4, data);
+    glue(stlr, MEMSUFFIX)(EA, data >> 32);
+    glue(stlr, MEMSUFFIX)(EA + 4, data);
 }
 PPC_SPE_ST_OP(dw_le, spe_stdw_le);
 static always_inline uint64_t glue(spe_ldh, MEMSUFFIX) (target_ulong EA)
@@ -1218,20 +1101,20 @@ PPC_SPE_ST_OP(dh, spe_stdh);
 static always_inline uint64_t glue(spe_ldh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 4) << 16;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 6);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 4) << 16;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 6);
     return ret;
 }
 PPC_SPE_LD_OP(dh_le, spe_ldh_le);
 static always_inline void glue(spe_stdh_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 4, data >> 16);
-    glue(st16r, MEMSUFFIX)(EA + 6, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 4, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA + 6, data);
 }
 PPC_SPE_ST_OP(dh_le, spe_stdh_le);
 static always_inline uint64_t glue(spe_lwhe, MEMSUFFIX) (target_ulong EA)
@@ -1252,16 +1135,16 @@ PPC_SPE_ST_OP(whe, spe_stwhe);
 static always_inline uint64_t glue(spe_lwhe_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 16;
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 16;
     return ret;
 }
 PPC_SPE_LD_OP(whe_le, spe_lwhe_le);
 static always_inline void glue(spe_stwhe_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 16);
 }
 PPC_SPE_ST_OP(whe_le, spe_stwhe_le);
 static always_inline uint64_t glue(spe_lwhou, MEMSUFFIX) (target_ulong EA)
@@ -1290,24 +1173,24 @@ PPC_SPE_ST_OP(who, spe_stwho);
 static always_inline uint64_t glue(spe_lwhou_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2);
     return ret;
 }
 PPC_SPE_LD_OP(whou_le, spe_lwhou_le);
 static always_inline uint64_t glue(spe_lwhos_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = ((uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA))) << 32;
-    ret |= (uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA + 2));
+    ret = ((uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA))) << 32;
+    ret |= (uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA + 2));
     return ret;
 }
 PPC_SPE_LD_OP(whos_le, spe_lwhos_le);
 static always_inline void glue(spe_stwho_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 2, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 2, data);
 }
 PPC_SPE_ST_OP(who_le, spe_stwho_le);
 #if !defined(TARGET_PPC64)
@@ -1320,7 +1203,7 @@ PPC_SPE_ST_OP(wwo, spe_stwwo);
 static always_inline void glue(spe_stwwo_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data);
+    glue(stlr, MEMSUFFIX)(EA, data);
 }
 PPC_SPE_ST_OP(wwo_le, spe_stwwo_le);
 #endif
@@ -1334,7 +1217,7 @@ PPC_SPE_LD_OP(h, spe_lh);
 static always_inline uint64_t glue(spe_lh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 48) | ((uint64_t)tmp << 16);
 }
 PPC_SPE_LD_OP(h_le, spe_lh_le);
@@ -1349,7 +1232,7 @@ static always_inline
 uint64_t glue(spe_lwwsplat_le, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ld32r, MEMSUFFIX)(EA);
+    tmp = glue(ldlr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat_le, spe_lwwsplat_le);
@@ -1369,9 +1252,9 @@ uint64_t glue(spe_lwhsplat_le, MEMSUFFIX
 {
     uint64_t ret;
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     ret = ((uint64_t)tmp << 48) | ((uint64_t)tmp << 32);
-    tmp = glue(ld16r, MEMSUFFIX)(EA + 2);
+    tmp = glue(lduwr, MEMSUFFIX)(EA + 2);
     ret |= ((uint64_t)tmp << 16) | (uint64_t)tmp;
     return ret;
 }
Index: target-sh4/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/exec.h,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 exec.h
--- target-sh4/exec.h	16 Sep 2007 21:08:05 -0000	1.5
+++ target-sh4/exec.h	12 Oct 2007 07:14:49 -0000
@@ -48,6 +48,9 @@ static inline int cpu_halted(CPUState *e
 
 #ifndef CONFIG_USER_ONLY
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 #define RETURN() __asm__ __volatile__("")
Index: target-sh4/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/op_helper.c,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 op_helper.c
--- target-sh4/op_helper.c	16 Sep 2007 21:08:05 -0000	1.4
+++ target-sh4/op_helper.c	12 Oct 2007 07:14:50 -0000
@@ -30,6 +30,7 @@ void do_raise_exception(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -42,6 +43,21 @@ void do_raise_exception(void)
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 void tlb_fill(target_ulong addr, int is_write, int is_user, void *retaddr)
 {
     TranslationBlock *tb;
Index: target-sparc/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/exec.h,v
retrieving revision 1.21
diff -u -d -d -p -r1.21 exec.h
--- target-sparc/exec.h	30 Sep 2007 19:38:11 -0000	1.21
+++ target-sparc/exec.h	12 Oct 2007 07:14:50 -0000
@@ -100,6 +100,9 @@ void do_rdpsr();
 /* XXX: move that to a generic header */
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-sparc/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/op_helper.c,v
retrieving revision 1.41
diff -u -d -d -p -r1.41 op_helper.c
--- target-sparc/op_helper.c	1 Oct 2007 17:07:58 -0000	1.41
+++ target-sparc/op_helper.c	12 Oct 2007 07:14:50 -0000
@@ -1497,6 +1497,21 @@ static void do_unaligned_access(target_u
 #define ALIGNED_ONLY
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1508,6 +1523,7 @@ static void do_unaligned_access(target_u
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access(target_ulong addr, int is_write, int is_user,
                                 void *retaddr)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-13  9:56 [Qemu-devel] RFC: reverse-endian softmmu memory accessors J. Mayer
@ 2007-10-13 10:47 ` Blue Swirl
  2007-10-13 12:43   ` J. Mayer
  2007-10-13 13:02   ` Thiemo Seufer
  0 siblings, 2 replies; 20+ messages in thread
From: Blue Swirl @ 2007-10-13 10:47 UTC (permalink / raw)
  To: qemu-devel

On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> The problem:
> some CPU architectures, namely PowerPC and maybe others, offers
> facilities to access the memory or I/O in the reverse endianness, ie
> little-endian instead of big-endian for PowerPC, or provide instruction
> to make memory accesses in the "reverse-endian". This is implemented as
> a global flag on some CPU. This case is already handled by the PowerPC
> emulation but is is far from being optimal. Some other implementations
> allow the OS to store an "reverse-endian" flag in the TLB or the segment
> descriptors, thus providing per-page or per-segment endianness control.
> This is mostly used to ease driver migration from a PC platform to
> PowerPC without taking any care of the device endianness in the driver
> code (yes, this is bad...).

Nice, this may be useful for Sparc64. It has a global CPU flag for
endianness, individual pages can be marked as reverse endian, and
finally there are instructions that access memory in reverse endian.
The end result is a XOR of all these reverses. Though I don't know if
any of these features are used at all.

Other memory access functions could be merged too. Is the 32 bit load
with sign extension to 64 bits used in other architectures?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-13 10:47 ` Blue Swirl
@ 2007-10-13 12:43   ` J. Mayer
  2007-10-13 13:07     ` Blue Swirl
  2007-10-13 13:02   ` Thiemo Seufer
  1 sibling, 1 reply; 20+ messages in thread
From: J. Mayer @ 2007-10-13 12:43 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 2101 bytes --]

On Sat, 2007-10-13 at 13:47 +0300, Blue Swirl wrote:
> On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> > The problem:
> > some CPU architectures, namely PowerPC and maybe others, offers
> > facilities to access the memory or I/O in the reverse endianness, ie
> > little-endian instead of big-endian for PowerPC, or provide instruction
> > to make memory accesses in the "reverse-endian". This is implemented as
> > a global flag on some CPU. This case is already handled by the PowerPC
> > emulation but is is far from being optimal. Some other implementations
> > allow the OS to store an "reverse-endian" flag in the TLB or the segment
> > descriptors, thus providing per-page or per-segment endianness control.
> > This is mostly used to ease driver migration from a PC platform to
> > PowerPC without taking any care of the device endianness in the driver
> > code (yes, this is bad...).
> 
> Nice, this may be useful for Sparc64. It has a global CPU flag for
> endianness, individual pages can be marked as reverse endian, and
> finally there are instructions that access memory in reverse endian.
> The end result is a XOR of all these reverses. Though I don't know if
> any of these features are used at all.

I realized that I/O accesses for reverse-endian pages were not correct
in the softmmu_template.h header. This new version fixes this. It also
remove duplicated code in the case of unaligned accesses in a
reverse-endian page.

> Other memory access functions could be merged too. Is the 32 bit load
> with sign extension to 64 bits used in other architectures?

It's used by PowerPC. You're right, this should be fixed too. I did not
add this fix because it has less impact than the endian stuff, at least
on PowerPC were you have explicit load/store with reverse-endian
instruction which perform poorly with the current implementation and are
well optimized using the reverse-endian patch: the current
implementation does 2 bytes-swaps when loading little-endian data from
memory on a little-endian machine instead of ... nothing.

-- 
J. Mayer <l_indien@magic.fr>
Never organized

[-- Attachment #2: softmmu_reverse_endian.diff --]
[-- Type: text/x-patch, Size: 89923 bytes --]

Index: cpu-all.h
===================================================================
RCS file: /sources/qemu/qemu/cpu-all.h,v
retrieving revision 1.76
diff -u -d -d -p -r1.76 cpu-all.h
--- cpu-all.h	23 Sep 2007 15:28:03 -0000	1.76
+++ cpu-all.h	13 Oct 2007 12:30:26 -0000
@@ -161,7 +161,7 @@ typedef union {
  *
  * endian is:
  * (empty): target cpu endianness or 8 bit access
- *   r    : reversed target cpu endianness (not implemented yet)
+ *   r    : reversed target cpu endianness
  *   be   : big endian (not implemented yet)
  *   le   : little endian (not implemented yet)
  *
@@ -557,6 +557,7 @@ static inline void stfq_be_p(void *ptr, 
 
 /* target CPU memory access functions */
 #if defined(TARGET_WORDS_BIGENDIAN)
+/* native-endian */
 #define lduw_p(p) lduw_be_p(p)
 #define ldsw_p(p) ldsw_be_p(p)
 #define ldl_p(p) ldl_be_p(p)
@@ -568,7 +569,20 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_p(p, v) stq_be_p(p, v)
 #define stfl_p(p, v) stfl_be_p(p, v)
 #define stfq_p(p, v) stfq_be_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_le_p(p)
+#define ldswr_p(p) ldsw_le_p(p)
+#define ldlr_p(p) ldl_le_p(p)
+#define ldqr_p(p) ldq_le_p(p)
+#define ldflr_p(p) ldfl_le_p(p)
+#define ldfqr_p(p) ldfq_le_p(p)
+#define stwr_p(p, v) stw_le_p(p, v)
+#define stlr_p(p, v) stl_le_p(p, v)
+#define stqr_p(p, v) stq_le_p(p, v)
+#define stflr_p(p, v) stfl_le_p(p, v)
+#define stfqr_p(p, v) stfq_le_p(p, v)
 #else
+/* native-endian */
 #define lduw_p(p) lduw_le_p(p)
 #define ldsw_p(p) ldsw_le_p(p)
 #define ldl_p(p) ldl_le_p(p)
@@ -580,6 +594,18 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_p(p, v) stq_le_p(p, v)
 #define stfl_p(p, v) stfl_le_p(p, v)
 #define stfq_p(p, v) stfq_le_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_be_p(p)
+#define ldswr_p(p) ldsw_be_p(p)
+#define ldlr_p(p) ldl_be_p(p)
+#define ldqr_p(p) ldq_be_p(p)
+#define ldflr_p(p) ldfl_be_p(p)
+#define ldfqr_p(p) ldfq_be_p(p)
+#define stwr_p(p, v) stw_be_p(p, v)
+#define stlr_p(p, v) stl_be_p(p, v)
+#define stqr_p(p, v) stq_be_p(p, v)
+#define stflr_p(p, v) stfl_be_p(p, v)
+#define stfqr_p(p, v) stfq_be_p(p, v)
 #endif
 
 /* MMU memory access macros */
@@ -605,6 +631,7 @@ static inline void stfq_be_p(void *ptr, 
 #define laddr(x) (uint8_t *)(long)(x)
 #endif
 
+/* native-endian */
 #define ldub_raw(p) ldub_p(laddr((p)))
 #define ldsb_raw(p) ldsb_p(laddr((p)))
 #define lduw_raw(p) lduw_p(laddr((p)))
@@ -619,11 +646,26 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_raw(p, v) stq_p(saddr((p)), v)
 #define stfl_raw(p, v) stfl_p(saddr((p)), v)
 #define stfq_raw(p, v) stfq_p(saddr((p)), v)
-
+/* reverse endian */
+#define ldubr_raw(p) ldub_p(laddr((p)))
+#define ldsbr_raw(p) ldsb_p(laddr((p)))
+#define lduwr_raw(p) lduwr_p(laddr((p)))
+#define ldswr_raw(p) ldswr_p(laddr((p)))
+#define ldlr_raw(p) ldlr_p(laddr((p)))
+#define ldqr_raw(p) ldqr_p(laddr((p)))
+#define ldflr_raw(p) ldflr_p(laddr((p)))
+#define ldfqr_raw(p) ldfqr_p(laddr((p)))
+#define stbr_raw(p, v) stb_p(saddr((p)), v)
+#define stwr_raw(p, v) stwr_p(saddr((p)), v)
+#define stlr_raw(p, v) stlr_p(saddr((p)), v)
+#define stqr_raw(p, v) stqr_p(saddr((p)), v)
+#define stflr_raw(p, v) stflr_p(saddr((p)), v)
+#define stfqr_raw(p, v) stfqr_p(saddr((p)), v)
 
 #if defined(CONFIG_USER_ONLY)
 
 /* if user mode, no other memory access functions */
+/* native-endian */
 #define ldub(p) ldub_raw(p)
 #define ldsb(p) ldsb_raw(p)
 #define lduw(p) lduw_raw(p)
@@ -638,14 +680,38 @@ static inline void stfq_be_p(void *ptr, 
 #define stq(p, v) stq_raw(p, v)
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr(p) ldub_raw(p)
+#define ldsbr(p) ldsb_raw(p)
+#define lduwr(p) lduwr_raw(p)
+#define ldswr(p) ldswr_raw(p)
+#define ldlr(p) ldlr_raw(p)
+#define ldqr(p) ldqr_raw(p)
+#define ldflr(p) ldflr_raw(p)
+#define ldfqr(p) ldfqr_raw(p)
+#define stbr(p, v) stb_raw(p, v)
+#define stwr(p, v) stwr_raw(p, v)
+#define stlr(p, v) stlr_raw(p, v)
+#define stqr(p, v) stqr_raw(p, v)
+#define stflr(p, v) stflr_raw(p, v)
+#define stfqr(p, v) stfqr_raw(p, v)
 
+/* native-endian */
 #define ldub_code(p) ldub_raw(p)
 #define ldsb_code(p) ldsb_raw(p)
 #define lduw_code(p) lduw_raw(p)
 #define ldsw_code(p) ldsw_raw(p)
 #define ldl_code(p) ldl_raw(p)
 #define ldq_code(p) ldq_raw(p)
+/* reverse-endian */
+#define ldubr_code(p) ldub_raw(p)
+#define ldsbr_code(p) ldsb_raw(p)
+#define lduwr_code(p) lduwr_raw(p)
+#define ldswr_code(p) ldswr_raw(p)
+#define ldlr_code(p) ldlr_raw(p)
+#define ldqr_code(p) ldqr_raw(p)
 
+/* native-endian */
 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
 #define lduw_kernel(p) lduw_raw(p)
@@ -660,6 +726,21 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_kernel(p, v) stq_raw(p, v)
 #define stfl_kernel(p, v) stfl_raw(p, v)
 #define stfq_kernel(p, vt) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr_kernel(p) ldub_raw(p)
+#define ldsbr_kernel(p) ldsb_raw(p)
+#define lduwr_kernel(p) lduwr_raw(p)
+#define ldswr_kernel(p) ldswr_raw(p)
+#define ldlr_kernel(p) ldlr_raw(p)
+#define ldqr_kernel(p) ldqr_raw(p)
+#define ldflr_kernel(p) ldflr_raw(p)
+#define ldfqr_kernel(p) ldfqr_raw(p)
+#define stbr_kernel(p, v) stbr_raw(p, v)
+#define stwr_kernel(p, v) stwr_raw(p, v)
+#define stlr_kernel(p, v) stlr_raw(p, v)
+#define stqr_kernel(p, v) stqr_raw(p, v)
+#define stflr_kernel(p, v) stflr_raw(p, v)
+#define stfqr_kernel(p, vt) stfqr_raw(p, v)
 
 #endif /* defined(CONFIG_USER_ONLY) */
 
@@ -790,6 +871,8 @@ extern uint8_t *phys_ram_dirty;
    the physical address */
 #define IO_MEM_ROMD        (1)
 #define IO_MEM_SUBPAGE     (2)
+/* On some target CPUs, endiannes is stored in page tables */
+#define IO_MEM_REVERSE     (3)
 
 typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
 typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
Index: exec-all.h
===================================================================
RCS file: /sources/qemu/qemu/exec-all.h,v
retrieving revision 1.67
diff -u -d -d -p -r1.67 exec-all.h
--- exec-all.h	8 Oct 2007 13:16:14 -0000	1.67
+++ exec-all.h	13 Oct 2007 12:30:26 -0000
@@ -569,6 +569,21 @@ void tlb_fill(target_ulong addr, int is_
 #define MEMSUFFIX _code
 #define env cpu_single_env
 
+/* native-endian */
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+
+/* reverse-endian */
+#define REVERSE_ENDIAN
 #define DATA_SIZE 1
 #include "softmmu_header.h"
 
@@ -580,6 +595,7 @@ void tlb_fill(target_ulong addr, int is_
 
 #define DATA_SIZE 8
 #include "softmmu_header.h"
+#undef REVERSE_ENDIAN
 
 #undef ACCESS_TYPE
 #undef MEMSUFFIX
Index: exec.c
===================================================================
RCS file: /sources/qemu/qemu/exec.c,v
retrieving revision 1.108
diff -u -d -d -p -r1.108 exec.c
--- exec.c	8 Oct 2007 13:16:14 -0000	1.108
+++ exec.c	13 Oct 2007 12:30:27 -0000
@@ -2507,7 +2507,7 @@ void cpu_physical_memory_rw(target_phys_
     uint8_t *ptr;
     uint32_t val;
     target_phys_addr_t page;
-    unsigned long pd;
+    unsigned long pd, addr1;
     PhysPageDesc *p;
 
     while (len > 0) {
@@ -2524,31 +2524,54 @@ void cpu_physical_memory_rw(target_phys_
 
         if (is_write) {
             if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                /* XXX: could force cpu_single_env to NULL to avoid
-                   potential bugs */
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit write access */
-                    val = ldl_p(buf);
-                    io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit write access */
-                    val = lduw_p(buf);
-                    io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        stlr_p(ptr, *(uint32_t *)buf);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        stwr_p(ptr, *(uint16_t *)buf);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *ptr = *buf;
+                    goto invalidate_code;
                 } else {
-                    /* 8 bit write access */
-                    val = ldub_p(buf);
-                    io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
-                    l = 1;
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    /* XXX: could force cpu_single_env to NULL to avoid
+                       potential bugs */
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit write access */
+                        val = ldl_p(buf);
+                        io_mem_write[io_index][2](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit write access */
+                        val = lduw_p(buf);
+                        io_mem_write[io_index][1](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit write access */
+                        val = ldub_p(buf);
+                        io_mem_write[io_index][0](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 1;
+                    }
                 }
             } else {
-                unsigned long addr1;
                 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
                 /* RAM case */
                 ptr = phys_ram_base + addr1;
                 memcpy(ptr, buf, l);
+            invalidate_code:
                 if (!cpu_physical_memory_is_dirty(addr1)) {
                     /* invalidate code */
                     tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
@@ -2560,23 +2583,45 @@ void cpu_physical_memory_rw(target_phys_
         } else {
             if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
                 !(pd & IO_MEM_ROMD)) {
-                /* I/O case */
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit read access */
-                    val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
-                    stl_p(buf, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit read access */
-                    val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
-                    stw_p(buf, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        *(uint32_t *)buf = ldlr_p(ptr);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        *(uint16_t *)buf = lduwr_p(ptr);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *buf = *ptr;
                 } else {
-                    /* 8 bit read access */
-                    val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr);
-                    stb_p(buf, val);
-                    l = 1;
+                    /* I/O case */
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit read access */
+                        val = io_mem_read[io_index][2](io_mem_opaque[io_index],
+                                                       addr);
+                        stl_p(buf, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit read access */
+                        val = io_mem_read[io_index][1](io_mem_opaque[io_index],
+                                                       addr);
+                        stw_p(buf, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit read access */
+                        val = io_mem_read[io_index][0](io_mem_opaque[io_index],
+                                                       addr);
+                        stb_p(buf, val);
+                        l = 1;
+                    }
                 }
             } else {
                 /* RAM case */
@@ -2907,6 +2952,21 @@ void dump_exec_info(FILE *f,
 #define env cpu_single_env
 #define SOFTMMU_CODE_ACCESS
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2918,6 +2978,7 @@ void dump_exec_info(FILE *f,
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 #undef env
 
Index: softmmu_header.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_header.h,v
retrieving revision 1.17
diff -u -d -d -p -r1.17 softmmu_header.h
--- softmmu_header.h	8 Oct 2007 13:16:14 -0000	1.17
+++ softmmu_header.h	13 Oct 2007 12:30:27 -0000
@@ -17,6 +17,9 @@
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
+
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
@@ -38,6 +41,30 @@
 #else
 #error unsupported data size
 #endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX lr
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#else
+#error unsupported data size
+#endif
+#endif /* defined(REVERSE_ENDIAN) */
 
 #if ACCESS_TYPE == 0
 
@@ -342,6 +369,7 @@ static inline void glue(glue(st, SUFFIX)
 
 #if ACCESS_TYPE != 3
 
+#if !defined(REVERSE_ENDIAN)
 #if DATA_SIZE == 8
 static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
 {
@@ -386,6 +414,54 @@ static inline void glue(stfl, MEMSUFFIX)
 }
 #endif /* DATA_SIZE == 4 */
 
+#else /* defined(REVERSE_ENDIAN) */
+
+#if DATA_SIZE == 8
+static inline float64 glue(ldfqr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.i = glue(ldqr, MEMSUFFIX)(ptr);
+    return u.d;
+}
+
+static inline void glue(stfqr, MEMSUFFIX)(target_ulong ptr, float64 v)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.d = v;
+    glue(stqr, MEMSUFFIX)(ptr, u.i);
+}
+#endif /* DATA_SIZE == 8 */
+
+#if DATA_SIZE == 4
+static inline float32 glue(ldflr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = glue(ldlr, MEMSUFFIX)(ptr);
+    return u.f;
+}
+
+static inline void glue(stflr, MEMSUFFIX)(target_ulong ptr, float32 v)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    glue(stlr, MEMSUFFIX)(ptr, u.i);
+}
+#endif /* DATA_SIZE == 4 */
+
+#endif /* defined(REVERSE_ENDIAN) */
+
 #endif /* ACCESS_TYPE != 3 */
 
 #undef RES_TYPE
Index: softmmu_template.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_template.h,v
retrieving revision 1.18
diff -u -d -d -p -r1.18 softmmu_template.h
--- softmmu_template.h	17 Sep 2007 08:09:45 -0000	1.18
+++ softmmu_template.h	13 Oct 2007 12:30:27 -0000
@@ -19,25 +19,66 @@
  */
 #define DATA_SIZE (1 << SHIFT)
 
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
+#define RSUFFIX qr
+#define URSUFFIX qr
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
 #define USUFFIX l
+#define RSUFFIX lr
+#define URSUFFIX lr
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define USUFFIX uw
+#define RSUFFIX wr
+#define URSUFFIX uwr
 #define DATA_TYPE uint16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define USUFFIX ub
+#define RSUFFIX br
+#define URSUFFIX ubr
+#define DATA_TYPE uint8_t
+#else
+#error unsupported data size
+#endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define RSUFFIX q
+#define URSUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX lr
+#define RSUFFIX l
+#define URSUFFIX l
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define RSUFFIX w
+#define URSUFFIX uw
+#define DATA_TYPE uint16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define RSUFFIX b
+#define URSUFFIX ub
 #define DATA_TYPE uint8_t
 #else
 #error unsupported data size
 #endif
+#endif /* defined(REVERSE_ENDIAN) */
+
 
 #ifdef SOFTMMU_CODE_ACCESS
 #define READ_ACCESS_TYPE 2
@@ -47,25 +88,62 @@
 #define ADDR_READ addr_read
 #endif
 
+#if (defined(TARGET_WORDS_BIGENDIAN) && !defined(REVERSE_ENDIAN)) || \
+    (!defined(TARGET_WORDS_BIGENDIAN) && defined(REVERSE_ENDIAN))
+#define ACCESS_WORDS_BIGENDIAN
+#endif
+
+/* Beware: we do not have reverse-endian accessors for IOs */
+#if defined(REVERSE_ENDIAN)
+#define DO_IOSWAP 1
+#else
+#define DO_IOSWAP 0
+#endif
+#if SHIFT == 1
+#define IOSWAP(val) bswap16(val)
+#elif SHIFT >= 2
+#define IOSWAP(val) bswap32(val)
+#else
+#define IOSWAP(val) (val)
+#endif
+
 static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
                                                         int is_user,
                                                         void *retaddr);
 static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
-                                              target_ulong tlb_addr)
+                                              target_ulong tlb_addr,
+                                              int do_ioswap)
 {
     DATA_TYPE res;
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
 #if SHIFT <= 2
     res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        res = IOSWAP(res);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    res = (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr) << 32;
-    res |= io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = (uint64_t)tmp << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= tmp;
 #else
-    res = io_mem_read[index][2](io_mem_opaque[index], physaddr);
-    res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = tmp;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= (uint64_t)tmp << 32;
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -92,10 +170,34 @@ DATA_TYPE REGPARM(1) glue(glue(__ld, SUF
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, READ_ACCESS_TYPE,
+                                            is_user, retaddr);
+                    }
+#endif
+                    res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
@@ -144,10 +246,45 @@ static DATA_TYPE glue(glue(slow_ld, SUFF
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages) */
+                        addr1 = addr & ~(DATA_SIZE - 1);
+                        addr2 = addr1 + DATA_SIZE;
+                        res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
+                                                                      is_user,
+                                                                      retaddr);
+                        res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
+                                                                      is_user,
+                                                                      retaddr);
+                        shift = (addr & (DATA_SIZE - 1)) * 8;
+#ifdef ACCESS_WORDS_BIGENDIAN
+                        res = (res1 >> shift) |
+                            (res2 << ((DATA_SIZE * 8) - shift));
+#else
+                        res = (res1 << shift) |
+                            (res2 >> ((DATA_SIZE * 8) - shift));
+#endif
+                        res = (DATA_TYPE)res;
+                    } else {
+                        /* unaligned/aligned access in the same page */
+                        res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
@@ -158,7 +295,7 @@ static DATA_TYPE glue(glue(slow_ld, SUFF
             res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
                                                           is_user, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
             res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
 #else
             res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
@@ -186,22 +323,39 @@ static void glue(glue(slow_st, SUFFIX), 
 static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
                                           DATA_TYPE val,
                                           target_ulong tlb_addr,
-                                          void *retaddr)
+                                          void *retaddr, int do_ioswap)
 {
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     env->mem_write_vaddr = tlb_addr;
     env->mem_write_pc = (unsigned long)retaddr;
 #if SHIFT <= 2
+    if (do_ioswap != DO_IOSWAP)
+        val = IOSWAP(val);
     io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val >> 32);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #else
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -224,12 +378,37 @@ void REGPARM(2) glue(glue(__st, SUFFIX),
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            retaddr = GETPC();
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    retaddr = GETPC();
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, 1, is_user, retaddr);
+                    }
+#endif
+                    glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr, val);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                retaddr = GETPC();
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+            /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
@@ -275,15 +454,48 @@ static void glue(glue(slow_st, SUFFIX), 
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        /* XXX: not efficient, but simple */
+                        for(i = 0;i < DATA_SIZE; i++) {
+#ifdef ACCESS_WORDS_BIGENDIAN
+                            glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
+                                                      is_user, retaddr);
+#else
+                            glue(slow_stb, MMUSUFFIX)(addr + i,
+                                                      val >> (((DATA_SIZE - 1) * 8)
+                                                              - (i * 8)),
+                                                      is_user, retaddr);
+#endif
+                        }
+
+                    } else {
+                        /* aligned/unaligned access in the same page */
+                        glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr,
+                                                      val);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
             for(i = 0;i < DATA_SIZE; i++) {
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
                 glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
                                           is_user, retaddr);
 #else
@@ -304,10 +516,15 @@ static void glue(glue(slow_st, SUFFIX), 
 
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
 
+#undef DO_IOSWAP
+#undef IOSWAP
+#undef ACCESS_WORDS_BIGENDIAN
 #undef READ_ACCESS_TYPE
 #undef SHIFT
 #undef DATA_TYPE
 #undef SUFFIX
 #undef USUFFIX
+#undef RSUFFIX
+#undef URSUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
Index: target-alpha/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/exec.h,v
retrieving revision 1.3
diff -u -d -d -p -r1.3 exec.h
--- target-alpha/exec.h	16 Sep 2007 21:08:01 -0000	1.3
+++ target-alpha/exec.h	13 Oct 2007 12:30:27 -0000
@@ -62,6 +62,9 @@ register uint64_t T2 asm(AREG3);
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-alpha/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/op_helper.c,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_helper.c
--- target-alpha/op_helper.c	16 Sep 2007 21:08:01 -0000	1.2
+++ target-alpha/op_helper.c	13 Oct 2007 12:30:28 -0000
@@ -1207,6 +1207,21 @@ void helper_st_phys_to_virt (void)
 
 #define MMUSUFFIX _mmu
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1218,6 +1233,7 @@ void helper_st_phys_to_virt (void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-arm/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-arm/exec.h,v
retrieving revision 1.13
diff -u -d -d -p -r1.13 exec.h
--- target-arm/exec.h	16 Sep 2007 21:08:01 -0000	1.13
+++ target-arm/exec.h	13 Oct 2007 12:30:28 -0000
@@ -64,6 +64,9 @@ static inline int cpu_halted(CPUState *e
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 /* In op_helper.c */
Index: target-arm/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/op_helper.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 op_helper.c
--- target-arm/op_helper.c	16 Sep 2007 21:08:02 -0000	1.6
+++ target-arm/op_helper.c	13 Oct 2007 12:30:28 -0000
@@ -180,6 +180,21 @@ void do_vfp_get_fpscr(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -191,6 +206,7 @@ void do_vfp_get_fpscr(void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-cris/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-cris/exec.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 exec.h
--- target-cris/exec.h	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/exec.h	13 Oct 2007 12:30:28 -0000
@@ -50,6 +50,9 @@ void tlb_fill (target_ulong addr, int is
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_cris_flush_flags(CPUCRISState *env, int cc_op);
Index: target-cris/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/op_helper.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 op_helper.c
--- target-cris/op_helper.c	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/op_helper.c	13 Oct 2007 12:30:28 -0000
@@ -25,6 +25,21 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -36,6 +51,7 @@
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-i386/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/exec.h,v
retrieving revision 1.37
diff -u -d -d -p -r1.37 exec.h
--- target-i386/exec.h	23 Sep 2007 15:28:04 -0000	1.37
+++ target-i386/exec.h	13 Oct 2007 12:30:28 -0000
@@ -217,6 +217,9 @@ void check_iol_DX(void);
 #if !defined(CONFIG_USER_ONLY)
 
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 
 static inline double ldfq(target_ulong ptr)
 {
Index: target-i386/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/helper.c,v
retrieving revision 1.89
diff -u -d -d -p -r1.89 helper.c
--- target-i386/helper.c	27 Sep 2007 01:52:00 -0000	1.89
+++ target-i386/helper.c	13 Oct 2007 12:30:28 -0000
@@ -3867,6 +3867,21 @@ void update_fp_status(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -3878,6 +3893,7 @@ void update_fp_status(void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 #endif
 
Index: target-m68k/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/exec.h,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 exec.h
--- target-m68k/exec.h	16 Sep 2007 21:08:03 -0000	1.4
+++ target-m68k/exec.h	13 Oct 2007 12:30:28 -0000
@@ -42,6 +42,9 @@ int cpu_m68k_handle_mmu_fault (CPUState 
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_m68k_flush_flags(CPUM68KState *env, int cc_op);
Index: target-m68k/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/op_helper.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 op_helper.c
--- target-m68k/op_helper.c	16 Sep 2007 21:08:03 -0000	1.6
+++ target-m68k/op_helper.c	13 Oct 2007 12:30:28 -0000
@@ -33,6 +33,21 @@ extern int semihosting_enabled;
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -44,6 +59,7 @@ extern int semihosting_enabled;
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-mips/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-mips/exec.h,v
retrieving revision 1.38
diff -u -d -d -p -r1.38 exec.h
--- target-mips/exec.h	9 Oct 2007 03:39:58 -0000	1.38
+++ target-mips/exec.h	13 Oct 2007 12:30:28 -0000
@@ -54,6 +54,9 @@ register target_ulong T2 asm(AREG3);
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 #if defined(TARGET_MIPSN32) || defined(TARGET_MIPS64)
Index: target-mips/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/op_helper.c,v
retrieving revision 1.65
diff -u -d -d -p -r1.65 op_helper.c
--- target-mips/op_helper.c	9 Oct 2007 03:39:58 -0000	1.65
+++ target-mips/op_helper.c	13 Oct 2007 12:30:28 -0000
@@ -544,6 +544,21 @@ static void do_unaligned_access (target_
 #define MMUSUFFIX _mmu
 #define ALIGNED_ONLY
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -555,6 +570,7 @@ static void do_unaligned_access (target_
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr)
 {
Index: target-ppc/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/exec.h,v
retrieving revision 1.28
diff -u -d -d -p -r1.28 exec.h
--- target-ppc/exec.h	7 Oct 2007 18:19:25 -0000	1.28
+++ target-ppc/exec.h	13 Oct 2007 12:30:28 -0000
@@ -91,7 +91,12 @@ static always_inline target_ulong rotl64
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
+
+#include "softmmu_exec.h"
+#define REVERSE_ENDIAN
 #include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
+
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 void do_raise_exception_err (uint32_t exception, int error_code);
Index: target-ppc/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper.c,v
retrieving revision 1.49
diff -u -d -d -p -r1.49 op_helper.c
--- target-ppc/op_helper.c	7 Oct 2007 17:13:43 -0000	1.49
+++ target-ppc/op_helper.c	13 Oct 2007 12:30:28 -0000
@@ -2291,6 +2291,21 @@ DO_SPE_OP1(fsctuf);
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2302,6 +2317,7 @@ DO_SPE_OP1(fsctuf);
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-ppc/op_helper.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper.h,v
retrieving revision 1.21
diff -u -d -d -p -r1.21 op_helper.h
--- target-ppc/op_helper.h	7 Oct 2007 17:13:44 -0000	1.21
+++ target-ppc/op_helper.h	13 Oct 2007 12:30:28 -0000
@@ -37,19 +37,6 @@ void glue(do_POWER2_lfq_le, MEMSUFFIX) (
 void glue(do_POWER2_stfq, MEMSUFFIX) (void);
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void);
 
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst);
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stsw_64, MEMSUFFIX) (int src);
-void glue(do_stsw_le_64, MEMSUFFIX) (int src);
-void glue(do_lmw_64, MEMSUFFIX) (int dst);
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stmw_64, MEMSUFFIX) (int src);
-void glue(do_stmw_le_64, MEMSUFFIX) (int src);
-void glue(do_icbi_64, MEMSUFFIX) (void);
-void glue(do_dcbz_64, MEMSUFFIX) (void);
-#endif
-
 #else
 
 void do_print_mem_EA (target_ulong EA);
Index: target-ppc/op_helper_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper_mem.h,v
retrieving revision 1.14
diff -u -d -d -p -r1.14 op_helper_mem.h
--- target-ppc/op_helper_mem.h	7 Oct 2007 17:13:44 -0000	1.14
+++ target-ppc/op_helper_mem.h	13 Oct 2007 12:30:28 -0000
@@ -19,85 +19,33 @@
  */
 
 /* Multiple word / string load and store */
-static always_inline target_ulong glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000UL) >> 24) | ((tmp & 0x00FF0000UL) >> 8) |
-        ((tmp & 0x0000FF00UL) << 8) | ((tmp & 0x000000FFUL) << 24);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  target_ulong data)
-{
-    uint32_t tmp =
-        ((data & 0xFF000000UL) >> 24) | ((data & 0x00FF0000UL) >> 8) |
-        ((data & 0x0000FF00UL) << 8) | ((data & 0x000000FFUL) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
 void glue(do_lmw, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldl, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lmw_le, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldlr, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw_le, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_le_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lsw, MEMSUFFIX) (int dst)
 {
@@ -105,71 +53,33 @@ void glue(do_lsw, MEMSUFFIX) (int dst)
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst++] = glue(ldl, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
-#endif
 
 void glue(do_stsw, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 void glue(do_lsw_le, MEMSUFFIX) (int dst)
 {
@@ -177,71 +87,33 @@ void glue(do_lsw_le, MEMSUFFIX) (int dst
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst++] = glue(ldlr, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
-#endif
 
 void glue(do_stsw_le, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_le_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 /* Instruction cache invalidation helper */
 void glue(do_icbi, MEMSUFFIX) (void)
@@ -252,27 +124,11 @@ void glue(do_icbi, MEMSUFFIX) (void)
      * (not a fetch) by the MMU. To be sure it will be so,
      * do the load "by hand".
      */
-    tmp = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint32_t)T0,
-                             (uint32_t)(T0 + env->icache_line_size));
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_icbi_64, MEMSUFFIX) (void)
-{
-    uint64_t tmp;
-    /* Invalidate one cache line :
-     * PowerPC specification says this is to be treated like a load
-     * (not a fetch) by the MMU. To be sure it will be so,
-     * do the load "by hand".
-     */
-    tmp = glue(ldq, MEMSUFFIX)((uint64_t)T0);
+    tmp = glue(ldl, MEMSUFFIX)(T0);
     T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint64_t)T0,
-                             (uint64_t)(T0 + env->icache_line_size));
+    /* We assume it would not wrap around 2^32 on 32 bits targets */
+    tb_invalidate_page_range(T0, T0 + env->icache_line_size);
 }
-#endif
 
 void glue(do_dcbz, MEMSUFFIX) (void)
 {
@@ -281,90 +137,43 @@ void glue(do_dcbz, MEMSUFFIX) (void)
     /* XXX: should be 970 specific (?) */
     if (((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1)
         dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x1C), 0);
-    if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x3CUL), 0);
-        if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x7CUL), 0);
-        }
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_dcbz_64, MEMSUFFIX) (void)
-{
-    int dcache_line_size = env->dcache_line_size;
-
-    /* XXX: should be 970 specific (?) */
-    if (((env->spr[SPR_970_HID5] >> 6) & 0x3) == 0x2)
-        dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x1C), 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x00, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x04, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x08, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x0C, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x10, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x14, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x18, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x1C, 0);
     if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x3CUL), 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x20UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x24UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x28UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x2CUL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x30UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x34UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x38UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x3CUL, 0);
         if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x7CUL), 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x40UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x44UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x48UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x4CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x50UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x54UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x58UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x5CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x60UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x64UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x68UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x6CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x70UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x74UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x78UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x7CUL, 0);
         }
     }
 }
-#endif
 
 /* PowerPC 601 specific instructions (POWER bridge) */
 // XXX: to be tested
@@ -400,26 +209,6 @@ void glue(do_POWER2_lfq, MEMSUFFIX) (voi
     FT1 = glue(ldfq, MEMSUFFIX)((uint32_t)(T0 + 4));
 }
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 void glue(do_POWER2_lfq_le, MEMSUFFIX) (void)
 {
     FT0 = glue(ldfqr, MEMSUFFIX)((uint32_t)(T0 + 4));
@@ -432,25 +221,6 @@ void glue(do_POWER2_stfq, MEMSUFFIX) (vo
     glue(stfq, MEMSUFFIX)((uint32_t)(T0 + 4), FT1);
 }
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void)
 {
     glue(stfqr, MEMSUFFIX)((uint32_t)(T0 + 4), FT0);
Index: target-ppc/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_mem.h,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 op_mem.h
--- target-ppc/op_mem.h	7 Oct 2007 18:19:25 -0000	1.22
+++ target-ppc/op_mem.h	13 Oct 2007 12:30:28 -0000
@@ -18,82 +18,15 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-static always_inline uint16_t glue(ld16r, MEMSUFFIX) (target_ulong EA)
-{
-    uint16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline int32_t glue(ld16rs, MEMSUFFIX) (target_ulong EA)
-{
-    int16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return (int16_t)((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline uint32_t glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline uint64_t glue(ld64r, MEMSUFFIX) (target_ulong EA)
-{
-    uint64_t tmp = glue(ldq, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00000000000000ULL) >> 56) |
-        ((tmp & 0x00FF000000000000ULL) >> 40) |
-        ((tmp & 0x0000FF0000000000ULL) >> 24) |
-        ((tmp & 0x000000FF00000000ULL) >> 8) |
-        ((tmp & 0x00000000FF000000ULL) << 8) |
-        ((tmp & 0x0000000000FF0000ULL) << 24) |
-        ((tmp & 0x000000000000FF00ULL) << 40) |
-        ((tmp & 0x00000000000000FFULL) << 54);
-}
-#endif
-
 #if defined(TARGET_PPC64)
 static always_inline int64_t glue(ldsl, MEMSUFFIX) (target_ulong EA)
 {
     return (int32_t)glue(ldl, MEMSUFFIX)(EA);
 }
 
-static always_inline int64_t glue(ld32rs, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return (int32_t)((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-#endif
-
-static always_inline void glue(st16r, MEMSUFFIX) (target_ulong EA,
-                                                  uint16_t data)
-{
-    uint16_t tmp = ((data & 0xFF00) >> 8) | ((data & 0x00FF) << 8);
-    glue(stw, MEMSUFFIX)(EA, tmp);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  uint32_t data)
-{
-    uint32_t tmp = ((data & 0xFF000000) >> 24) | ((data & 0x00FF0000) >> 8) |
-        ((data & 0x0000FF00) << 8) | ((data & 0x000000FF) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline void glue(st64r, MEMSUFFIX) (target_ulong EA,
-                                                  uint64_t data)
+static always_inline int64_t glue(ldslr, MEMSUFFIX) (target_ulong EA)
 {
-    uint64_t tmp = ((data & 0xFF00000000000000ULL) >> 56) |
-        ((data & 0x00FF000000000000ULL) >> 40) |
-        ((data & 0x0000FF0000000000ULL) >> 24) |
-        ((data & 0x000000FF00000000ULL) >> 8) |
-        ((data & 0x00000000FF000000ULL) << 8) |
-        ((data & 0x0000000000FF0000ULL) << 24) |
-        ((data & 0x000000000000FF00ULL) << 40) |
-        ((data & 0x00000000000000FFULL) << 56);
-    glue(stq, MEMSUFFIX)(EA, tmp);
+    return (int32_t)glue(ldlr, MEMSUFFIX)(EA);
 }
 #endif
 
@@ -130,6 +63,7 @@ void OPPROTO glue(glue(glue(op_st, name)
 }
 #endif
 
+/* Native-endian fixed-point memory loads                                    */
 PPC_LD_OP(bz, ldub);
 PPC_LD_OP(ha, ldsw);
 PPC_LD_OP(hz, lduw);
@@ -145,20 +79,21 @@ PPC_LD_OP_64(hz, lduw);
 PPC_LD_OP_64(wz, ldl);
 #endif
 
-PPC_LD_OP(ha_le, ld16rs);
-PPC_LD_OP(hz_le, ld16r);
-PPC_LD_OP(wz_le, ld32r);
+/* Reverse-endian fixed-point memory loads                                   */
+PPC_LD_OP(ha_le, ldswr);
+PPC_LD_OP(hz_le, lduwr);
+PPC_LD_OP(wz_le, ldlr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP(d_le, ld64r);
-PPC_LD_OP(wa_le, ld32rs);
-PPC_LD_OP_64(d_le, ld64r);
-PPC_LD_OP_64(wa_le, ld32rs);
-PPC_LD_OP_64(ha_le, ld16rs);
-PPC_LD_OP_64(hz_le, ld16r);
-PPC_LD_OP_64(wz_le, ld32r);
+PPC_LD_OP(d_le, ldqr);
+PPC_LD_OP(wa_le, ldslr);
+PPC_LD_OP_64(d_le, ldqr);
+PPC_LD_OP_64(wa_le, ldslr);
+PPC_LD_OP_64(ha_le, ldswr);
+PPC_LD_OP_64(hz_le, lduwr);
+PPC_LD_OP_64(wz_le, ldlr);
 #endif
 
-/***                              Integer store                            ***/
+/* Native-endian fixed-point memory stores                                   */
 PPC_ST_OP(b, stb);
 PPC_ST_OP(h, stw);
 PPC_ST_OP(w, stl);
@@ -170,27 +105,29 @@ PPC_ST_OP_64(h, stw);
 PPC_ST_OP_64(w, stl);
 #endif
 
-PPC_ST_OP(h_le, st16r);
-PPC_ST_OP(w_le, st32r);
+/* Reverse-endian fixed-point memory stores                                  */
+PPC_ST_OP(h_le, stwr);
+PPC_ST_OP(w_le, stlr);
 #if defined(TARGET_PPC64)
-PPC_ST_OP(d_le, st64r);
-PPC_ST_OP_64(d_le, st64r);
-PPC_ST_OP_64(h_le, st16r);
-PPC_ST_OP_64(w_le, st32r);
+PPC_ST_OP(d_le, stqr);
+PPC_ST_OP_64(d_le, stqr);
+PPC_ST_OP_64(h_le, stwr);
+PPC_ST_OP_64(w_le, stlr);
 #endif
 
-/***                Integer load and store with byte reverse               ***/
-PPC_LD_OP(hbr, ld16r);
-PPC_LD_OP(wbr, ld32r);
-PPC_ST_OP(hbr, st16r);
-PPC_ST_OP(wbr, st32r);
+/* Native-endian fixed-point loads and stores with byte-reverse              */
+PPC_LD_OP(hbr, lduwr);
+PPC_LD_OP(wbr, ldlr);
+PPC_ST_OP(hbr, stwr);
+PPC_ST_OP(wbr, stlr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP_64(hbr, ld16r);
-PPC_LD_OP_64(wbr, ld32r);
-PPC_ST_OP_64(hbr, st16r);
-PPC_ST_OP_64(wbr, st32r);
+PPC_LD_OP_64(hbr, lduwr);
+PPC_LD_OP_64(wbr, ldlr);
+PPC_ST_OP_64(hbr, stwr);
+PPC_ST_OP_64(wbr, stlr);
 #endif
 
+/* Reverse-endian fixed-point loads and stores with byte-reverse             */
 PPC_LD_OP(hbr_le, lduw);
 PPC_LD_OP(wbr_le, ldl);
 PPC_ST_OP(hbr_le, stw);
@@ -202,88 +139,76 @@ PPC_ST_OP_64(hbr_le, stw);
 PPC_ST_OP_64(wbr_le, stl);
 #endif
 
-/***                    Integer load and store multiple                    ***/
+/* Native-endian fixed-point loads and stores multiple                       */
 void OPPROTO glue(op_lmw, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
-{
-    glue(do_lmw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
-void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
 {
     glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
+#endif
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
+/* Reverse-endian fixed-point loads and stores multiple                      */
+void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
 {
-    glue(do_stmw_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
 void OPPROTO glue(op_stmw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
-{
-    glue(do_stmw_le_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-/***                    Integer load and store strings                     ***/
-void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw, MEMSUFFIX)(PARAM1);
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+    glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+/* Native-endian loads and stores string                                     */
+void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -303,6 +228,7 @@ void OPPROTO glue(op_lswx, MEMSUFFIX) (v
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
@@ -320,13 +246,44 @@ void OPPROTO glue(op_lswx_64, MEMSUFFIX)
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
+void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
+{
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
+/* Reverse-endian loads and stores string                                    */
+void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+{
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
 void OPPROTO glue(op_lswx_le, MEMSUFFIX) (void)
 {
     /* Note: T1 comes from xer_bc then no cast is needed */
@@ -337,6 +294,7 @@ void OPPROTO glue(op_lswx_le, MEMSUFFIX)
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
@@ -354,29 +312,16 @@ void OPPROTO glue(op_lswx_le_64, MEMSUFF
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
-{
-    glue(do_stsw, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
-{
-    glue(do_stsw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
 void OPPROTO glue(op_stsw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
@@ -384,7 +329,7 @@ void OPPROTO glue(op_stsw_le, MEMSUFFIX)
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_stsw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_stsw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -432,38 +377,9 @@ PPC_STF_OP_64(fs, stfs);
 PPC_STF_OP_64(fiwx, stfiwx);
 #endif
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 static always_inline void glue(stfsr, MEMSUFFIX) (target_ulong EA, double d)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = float64_to_float32(d, &env->fp_status);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stfl, MEMSUFFIX)(EA, u.f);
+    glue(stflr, MEMSUFFIX)(EA, float64_to_float32(d, &env->fp_status));
 }
 
 static always_inline void glue(stfiwxr, MEMSUFFIX) (target_ulong EA, double d)
@@ -475,11 +391,7 @@ static always_inline void glue(stfiwxr, 
 
     /* Store the low order 32 bits without any conversion */
     u.d = d;
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stl, MEMSUFFIX)(EA, u.u);
+    glue(stlr, MEMSUFFIX)(EA, u.u);
 }
 
 PPC_STF_OP(fd_le, stfqr);
@@ -520,40 +432,9 @@ PPC_LDF_OP_64(fd, ldfq);
 PPC_LDF_OP_64(fs, ldfs);
 #endif
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 static always_inline double glue(ldfsr, MEMSUFFIX) (target_ulong EA)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = glue(ldfl, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-
-    return float32_to_float64(u.f, &env->fp_status);
+    return float32_to_float64(glue(ldflr, MEMSUFFIX)(EA), &env->fp_status);
 }
 
 PPC_LDF_OP(fd_le, ldfqr);
@@ -615,7 +496,7 @@ void OPPROTO glue(op_lwarx_le, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldlr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -627,7 +508,7 @@ void OPPROTO glue(op_lwarx_le_64, MEMSUF
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldlr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -638,7 +519,7 @@ void OPPROTO glue(op_ldarx_le, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -649,7 +530,7 @@ void OPPROTO glue(op_ldarx_le_64, MEMSUF
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -731,7 +612,7 @@ void OPPROTO glue(op_stwcx_le, MEMSUFFIX
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -748,7 +629,7 @@ void OPPROTO glue(op_stwcx_le_64, MEMSUF
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -764,7 +645,7 @@ void OPPROTO glue(op_stdcx_le, MEMSUFFIX
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -780,7 +661,7 @@ void OPPROTO glue(op_stdcx_le_64, MEMSUF
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -862,6 +743,7 @@ void OPPROTO glue(op_dcbz_l128, MEMSUFFI
 
 void OPPROTO glue(op_dcbz, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
@@ -940,7 +822,7 @@ void OPPROTO glue(op_dcbz_l128_64, MEMSU
 
 void OPPROTO glue(op_dcbz_64, MEMSUFFIX) (void)
 {
-    glue(do_dcbz_64, MEMSUFFIX)();
+    glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -948,6 +830,7 @@ void OPPROTO glue(op_dcbz_64, MEMSUFFIX)
 /* Instruction cache block invalidate */
 void OPPROTO glue(op_icbi, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
@@ -955,7 +838,7 @@ void OPPROTO glue(op_icbi, MEMSUFFIX) (v
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_icbi_64, MEMSUFFIX) (void)
 {
-    glue(do_icbi_64, MEMSUFFIX)();
+    glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -991,28 +874,28 @@ void OPPROTO glue(op_ecowx_64, MEMSUFFIX
 
 void OPPROTO glue(op_eciwx_le, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldlr, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_le_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldlr, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
 
 void OPPROTO glue(op_ecowx_le, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_ecowx_le_64, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
     RETURN();
 }
 #endif
@@ -1070,8 +953,8 @@ void OPPROTO glue(op_vr_lvx, MEMSUFFIX) 
 
 void OPPROTO glue(op_vr_lvx_le, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint32_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint32_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint32_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx, MEMSUFFIX) (void)
@@ -1082,8 +965,8 @@ void OPPROTO glue(op_vr_stvx, MEMSUFFIX)
 
 void OPPROTO glue(op_vr_stvx_le, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 
 #if defined(TARGET_PPC64)
@@ -1095,8 +978,8 @@ void OPPROTO glue(op_vr_lvx_64, MEMSUFFI
 
 void OPPROTO glue(op_vr_lvx_le_64, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint64_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint64_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint64_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx_64, MEMSUFFIX) (void)
@@ -1107,8 +990,8 @@ void OPPROTO glue(op_vr_stvx_64, MEMSUFF
 
 void OPPROTO glue(op_vr_stvx_le_64, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 #endif
 #undef VR_DWORD0
@@ -1163,8 +1046,8 @@ _PPC_SPE_ST_OP(name, op)
 #if !defined(TARGET_PPC64)
 PPC_SPE_LD_OP(dd, ldq);
 PPC_SPE_ST_OP(dd, stq);
-PPC_SPE_LD_OP(dd_le, ld64r);
-PPC_SPE_ST_OP(dd_le, st64r);
+PPC_SPE_LD_OP(dd_le, ldqr);
+PPC_SPE_ST_OP(dd_le, stqr);
 #endif
 static always_inline uint64_t glue(spe_ldw, MEMSUFFIX) (target_ulong EA)
 {
@@ -1184,16 +1067,16 @@ PPC_SPE_ST_OP(dw, spe_stdw);
 static always_inline uint64_t glue(spe_ldw_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld32r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld32r, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldlr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldlr, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw_le, spe_ldw_le);
 static always_inline void glue(spe_stdw_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data >> 32);
-    glue(st32r, MEMSUFFIX)(EA + 4, data);
+    glue(stlr, MEMSUFFIX)(EA, data >> 32);
+    glue(stlr, MEMSUFFIX)(EA + 4, data);
 }
 PPC_SPE_ST_OP(dw_le, spe_stdw_le);
 static always_inline uint64_t glue(spe_ldh, MEMSUFFIX) (target_ulong EA)
@@ -1218,20 +1101,20 @@ PPC_SPE_ST_OP(dh, spe_stdh);
 static always_inline uint64_t glue(spe_ldh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 4) << 16;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 6);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 4) << 16;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 6);
     return ret;
 }
 PPC_SPE_LD_OP(dh_le, spe_ldh_le);
 static always_inline void glue(spe_stdh_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 4, data >> 16);
-    glue(st16r, MEMSUFFIX)(EA + 6, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 4, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA + 6, data);
 }
 PPC_SPE_ST_OP(dh_le, spe_stdh_le);
 static always_inline uint64_t glue(spe_lwhe, MEMSUFFIX) (target_ulong EA)
@@ -1252,16 +1135,16 @@ PPC_SPE_ST_OP(whe, spe_stwhe);
 static always_inline uint64_t glue(spe_lwhe_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 16;
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 16;
     return ret;
 }
 PPC_SPE_LD_OP(whe_le, spe_lwhe_le);
 static always_inline void glue(spe_stwhe_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 16);
 }
 PPC_SPE_ST_OP(whe_le, spe_stwhe_le);
 static always_inline uint64_t glue(spe_lwhou, MEMSUFFIX) (target_ulong EA)
@@ -1290,24 +1173,24 @@ PPC_SPE_ST_OP(who, spe_stwho);
 static always_inline uint64_t glue(spe_lwhou_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2);
     return ret;
 }
 PPC_SPE_LD_OP(whou_le, spe_lwhou_le);
 static always_inline uint64_t glue(spe_lwhos_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = ((uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA))) << 32;
-    ret |= (uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA + 2));
+    ret = ((uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA))) << 32;
+    ret |= (uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA + 2));
     return ret;
 }
 PPC_SPE_LD_OP(whos_le, spe_lwhos_le);
 static always_inline void glue(spe_stwho_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 2, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 2, data);
 }
 PPC_SPE_ST_OP(who_le, spe_stwho_le);
 #if !defined(TARGET_PPC64)
@@ -1320,7 +1203,7 @@ PPC_SPE_ST_OP(wwo, spe_stwwo);
 static always_inline void glue(spe_stwwo_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data);
+    glue(stlr, MEMSUFFIX)(EA, data);
 }
 PPC_SPE_ST_OP(wwo_le, spe_stwwo_le);
 #endif
@@ -1334,7 +1217,7 @@ PPC_SPE_LD_OP(h, spe_lh);
 static always_inline uint64_t glue(spe_lh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 48) | ((uint64_t)tmp << 16);
 }
 PPC_SPE_LD_OP(h_le, spe_lh_le);
@@ -1349,7 +1232,7 @@ static always_inline
 uint64_t glue(spe_lwwsplat_le, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ld32r, MEMSUFFIX)(EA);
+    tmp = glue(ldlr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat_le, spe_lwwsplat_le);
@@ -1369,9 +1252,9 @@ uint64_t glue(spe_lwhsplat_le, MEMSUFFIX
 {
     uint64_t ret;
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     ret = ((uint64_t)tmp << 48) | ((uint64_t)tmp << 32);
-    tmp = glue(ld16r, MEMSUFFIX)(EA + 2);
+    tmp = glue(lduwr, MEMSUFFIX)(EA + 2);
     ret |= ((uint64_t)tmp << 16) | (uint64_t)tmp;
     return ret;
 }
Index: target-sh4/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/exec.h,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 exec.h
--- target-sh4/exec.h	16 Sep 2007 21:08:05 -0000	1.5
+++ target-sh4/exec.h	13 Oct 2007 12:30:28 -0000
@@ -48,6 +48,9 @@ static inline int cpu_halted(CPUState *e
 
 #ifndef CONFIG_USER_ONLY
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 #define RETURN() __asm__ __volatile__("")
Index: target-sh4/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/op_helper.c,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 op_helper.c
--- target-sh4/op_helper.c	16 Sep 2007 21:08:05 -0000	1.4
+++ target-sh4/op_helper.c	13 Oct 2007 12:30:28 -0000
@@ -30,6 +30,21 @@ void do_raise_exception(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -41,6 +56,7 @@ void do_raise_exception(void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 void tlb_fill(target_ulong addr, int is_write, int is_user, void *retaddr)
 {
Index: target-sparc/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/exec.h,v
retrieving revision 1.21
diff -u -d -d -p -r1.21 exec.h
--- target-sparc/exec.h	30 Sep 2007 19:38:11 -0000	1.21
+++ target-sparc/exec.h	13 Oct 2007 12:30:28 -0000
@@ -100,6 +100,9 @@ void do_rdpsr();
 /* XXX: move that to a generic header */
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-sparc/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/op_helper.c,v
retrieving revision 1.41
diff -u -d -d -p -r1.41 op_helper.c
--- target-sparc/op_helper.c	1 Oct 2007 17:07:58 -0000	1.41
+++ target-sparc/op_helper.c	13 Oct 2007 12:30:28 -0000
@@ -1497,6 +1497,21 @@ static void do_unaligned_access(target_u
 #define ALIGNED_ONLY
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1508,6 +1523,7 @@ static void do_unaligned_access(target_u
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access(target_ulong addr, int is_write, int is_user,
                                 void *retaddr)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-13 10:47 ` Blue Swirl
  2007-10-13 12:43   ` J. Mayer
@ 2007-10-13 13:02   ` Thiemo Seufer
  1 sibling, 0 replies; 20+ messages in thread
From: Thiemo Seufer @ 2007-10-13 13:02 UTC (permalink / raw)
  To: Blue Swirl; +Cc: qemu-devel

Blue Swirl wrote:
> On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> > The problem:
> > some CPU architectures, namely PowerPC and maybe others, offers
> > facilities to access the memory or I/O in the reverse endianness, ie
> > little-endian instead of big-endian for PowerPC, or provide instruction
> > to make memory accesses in the "reverse-endian". This is implemented as
> > a global flag on some CPU. This case is already handled by the PowerPC
> > emulation but is is far from being optimal. Some other implementations
> > allow the OS to store an "reverse-endian" flag in the TLB or the segment
> > descriptors, thus providing per-page or per-segment endianness control.
> > This is mostly used to ease driver migration from a PC platform to
> > PowerPC without taking any care of the device endianness in the driver
> > code (yes, this is bad...).
> 
> Nice, this may be useful for Sparc64. It has a global CPU flag for
> endianness, individual pages can be marked as reverse endian, and
> finally there are instructions that access memory in reverse endian.
> The end result is a XOR of all these reverses. Though I don't know if
> any of these features are used at all.

Likewise for the MIPS reverse endianness global flag.

> Other memory access functions could be merged too. Is the 32 bit load
> with sign extension to 64 bits used in other architectures?

Yes for MIPS.


Thiemo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-13 12:43   ` J. Mayer
@ 2007-10-13 13:07     ` Blue Swirl
  2007-10-13 14:17       ` J. Mayer
  0 siblings, 1 reply; 20+ messages in thread
From: Blue Swirl @ 2007-10-13 13:07 UTC (permalink / raw)
  To: qemu-devel

On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> On Sat, 2007-10-13 at 13:47 +0300, Blue Swirl wrote:
> > On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> > > The problem:
> > > some CPU architectures, namely PowerPC and maybe others, offers
> > > facilities to access the memory or I/O in the reverse endianness, ie
> > > little-endian instead of big-endian for PowerPC, or provide instruction
> > > to make memory accesses in the "reverse-endian". This is implemented as
> > > a global flag on some CPU. This case is already handled by the PowerPC
> > > emulation but is is far from being optimal. Some other implementations
> > > allow the OS to store an "reverse-endian" flag in the TLB or the segment
> > > descriptors, thus providing per-page or per-segment endianness control.
> > > This is mostly used to ease driver migration from a PC platform to
> > > PowerPC without taking any care of the device endianness in the driver
> > > code (yes, this is bad...).
> >
> > Nice, this may be useful for Sparc64. It has a global CPU flag for
> > endianness, individual pages can be marked as reverse endian, and
> > finally there are instructions that access memory in reverse endian.
> > The end result is a XOR of all these reverses. Though I don't know if
> > any of these features are used at all.
>
> I realized that I/O accesses for reverse-endian pages were not correct
> in the softmmu_template.h header. This new version fixes this. It also
> remove duplicated code in the case of unaligned accesses in a
> reverse-endian page.

I think 64 bit access case is not handled correctly, but to solve that
it would be nice to extend the current IO access system to 64 bits.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-13 13:07     ` Blue Swirl
@ 2007-10-13 14:17       ` J. Mayer
  2007-10-13 22:07         ` J. Mayer
  0 siblings, 1 reply; 20+ messages in thread
From: J. Mayer @ 2007-10-13 14:17 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 2425 bytes --]

On Sat, 2007-10-13 at 16:07 +0300, Blue Swirl wrote:
> On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> > On Sat, 2007-10-13 at 13:47 +0300, Blue Swirl wrote:
> > > On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> > > > The problem:
> > > > some CPU architectures, namely PowerPC and maybe others, offers
> > > > facilities to access the memory or I/O in the reverse endianness, ie
> > > > little-endian instead of big-endian for PowerPC, or provide instruction
> > > > to make memory accesses in the "reverse-endian". This is implemented as
> > > > a global flag on some CPU. This case is already handled by the PowerPC
> > > > emulation but is is far from being optimal. Some other implementations
> > > > allow the OS to store an "reverse-endian" flag in the TLB or the segment
> > > > descriptors, thus providing per-page or per-segment endianness control.
> > > > This is mostly used to ease driver migration from a PC platform to
> > > > PowerPC without taking any care of the device endianness in the driver
> > > > code (yes, this is bad...).
> > >
> > > Nice, this may be useful for Sparc64. It has a global CPU flag for
> > > endianness, individual pages can be marked as reverse endian, and
> > > finally there are instructions that access memory in reverse endian.
> > > The end result is a XOR of all these reverses. Though I don't know if
> > > any of these features are used at all.
> >
> > I realized that I/O accesses for reverse-endian pages were not correct
> > in the softmmu_template.h header. This new version fixes this. It also
> > remove duplicated code in the case of unaligned accesses in a
> > reverse-endian page.
> 
> I think 64 bit access case is not handled correctly, but to solve that
> it would be nice to extend the current IO access system to 64 bits.

I think that if it was previously correct, it should still be, but... I
don't know how much having 64 bits I/O accesses is interresting, as I
don't know if there are real hw buses that have 64 bits data path...

Here's another version taking care of your remark about ldl memory
accessors.
* I replaced all ldl occurences with ldul
* when TARGET_LONG_BITS == 64, I also added ldsl accessors. And I
started using it in the PowerPC memory access micro-ops.
Then the patch is really more invasive than the previous ones.
This still does not break PowerPC or i386 target, as it seems.

-- 
J. Mayer <l_indien@magic.fr>
Never organized

[-- Attachment #2: softmmu_reverse_endian.diff --]
[-- Type: text/x-patch, Size: 169080 bytes --]

Index: cpu-all.h
===================================================================
RCS file: /sources/qemu/qemu/cpu-all.h,v
retrieving revision 1.76
diff -u -d -d -p -r1.76 cpu-all.h
--- cpu-all.h	23 Sep 2007 15:28:03 -0000	1.76
+++ cpu-all.h	13 Oct 2007 14:09:54 -0000
@@ -161,7 +161,7 @@ typedef union {
  *
  * endian is:
  * (empty): target cpu endianness or 8 bit access
- *   r    : reversed target cpu endianness (not implemented yet)
+ *   r    : reversed target cpu endianness
  *   be   : big endian (not implemented yet)
  *   le   : little endian (not implemented yet)
  *
@@ -215,7 +215,32 @@ static inline int ldsw_le_p(void *ptr)
 #endif
 }
 
-static inline int ldl_le_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return (uint32_t)val;
+#else
+    uint8_t *p = ptr;
+    return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+#endif
+}
+
+static inline int64_t ldsl_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return (int32_t)val;
+#else
+    uint8_t *p = ptr;
+    return (int32_t)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24));
+#endif
+}
+#else
+static inline int ldul_le_p(void *ptr)
 {
 #ifdef __powerpc__
     int val;
@@ -226,13 +251,14 @@ static inline int ldl_le_p(void *ptr)
     return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
 #endif
 }
+#endif
 
 static inline uint64_t ldq_le_p(void *ptr)
 {
     uint8_t *p = ptr;
     uint32_t v1, v2;
-    v1 = ldl_le_p(p);
-    v2 = ldl_le_p(p + 4);
+    v1 = ldul_le_p(p);
+    v2 = ldul_le_p(p + 4);
     return v1 | ((uint64_t)v2 << 32);
 }
 
@@ -275,7 +301,7 @@ static inline float32 ldfl_le_p(void *pt
         float32 f;
         uint32_t i;
     } u;
-    u.i = ldl_le_p(ptr);
+    u.i = ldul_le_p(ptr);
     return u.f;
 }
 
@@ -292,8 +318,8 @@ static inline void stfl_le_p(void *ptr, 
 static inline float64 ldfq_le_p(void *ptr)
 {
     CPU_DoubleU u;
-    u.l.lower = ldl_le_p(ptr);
-    u.l.upper = ldl_le_p(ptr + 4);
+    u.l.lower = ldul_le_p(ptr);
+    u.l.upper = ldul_le_p(ptr + 4);
     return u.d;
 }
 
@@ -317,11 +343,23 @@ static inline int ldsw_le_p(void *ptr)
     return *(int16_t *)ptr;
 }
 
-static inline int ldl_le_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_le_p(void *ptr)
 {
     return *(uint32_t *)ptr;
 }
 
+static inline int64_t ldsl_le_p(void *ptr)
+{
+    return *(int32_t *)ptr;
+}
+#else
+static inline int ldul_le_p(void *ptr)
+{
+    return *(uint32_t *)ptr;
+}
+#endif
+
 static inline uint64_t ldq_le_p(void *ptr)
 {
     return *(uint64_t *)ptr;
@@ -397,7 +435,38 @@ static inline int ldsw_be_p(void *ptr)
 #endif
 }
 
-static inline int ldl_be_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_be_p(void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+    int val;
+    asm volatile ("movl %1, %0\n"
+                  "bswap %0\n"
+                  : "=r" (val)
+                  : "m" (*(uint32_t *)ptr));
+    return (uint32_t)val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
+#endif
+}
+
+static inline int64_t ldsl_be_p(void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+    int val;
+    asm volatile ("movl %1, %0\n"
+                  "bswap %0\n"
+                  : "=r" (val)
+                  : "m" (*(uint32_t *)ptr));
+    return (int32_t)val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return (int32_t)((b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]);
+#endif
+}
+#else
+static inline int ldul_be_p(void *ptr)
 {
 #if defined(__i386__) || defined(__x86_64__)
     int val;
@@ -411,12 +480,13 @@ static inline int ldl_be_p(void *ptr)
     return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
 #endif
 }
+#endif
 
 static inline uint64_t ldq_be_p(void *ptr)
 {
     uint32_t a,b;
-    a = ldl_be_p(ptr);
-    b = ldl_be_p(ptr+4);
+    a = ldul_be_p(ptr);
+    b = ldul_be_p(ptr+4);
     return (((uint64_t)a<<32)|b);
 }
 
@@ -464,7 +534,7 @@ static inline float32 ldfl_be_p(void *pt
         float32 f;
         uint32_t i;
     } u;
-    u.i = ldl_be_p(ptr);
+    u.i = ldul_be_p(ptr);
     return u.f;
 }
 
@@ -481,8 +551,8 @@ static inline void stfl_be_p(void *ptr, 
 static inline float64 ldfq_be_p(void *ptr)
 {
     CPU_DoubleU u;
-    u.l.upper = ldl_be_p(ptr);
-    u.l.lower = ldl_be_p(ptr + 4);
+    u.l.upper = ldul_be_p(ptr);
+    u.l.lower = ldul_be_p(ptr + 4);
     return u.d;
 }
 
@@ -506,11 +576,23 @@ static inline int ldsw_be_p(void *ptr)
     return *(int16_t *)ptr;
 }
 
-static inline int ldl_be_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_be_p(void *ptr)
 {
     return *(uint32_t *)ptr;
 }
 
+static inline int64_t ldsl_be_p(void *ptr)
+{
+    return *(int32_t *)ptr;
+}
+#else
+static inline int ldul_be_p(void *ptr)
+{
+    return *(uint32_t *)ptr;
+}
+#endif
+
 static inline uint64_t ldq_be_p(void *ptr)
 {
     return *(uint64_t *)ptr;
@@ -557,9 +639,13 @@ static inline void stfq_be_p(void *ptr, 
 
 /* target CPU memory access functions */
 #if defined(TARGET_WORDS_BIGENDIAN)
+/* native-endian */
 #define lduw_p(p) lduw_be_p(p)
 #define ldsw_p(p) ldsw_be_p(p)
-#define ldl_p(p) ldl_be_p(p)
+#define ldul_p(p) ldul_be_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_p(p) ldsl_be_p(p)
+#endif
 #define ldq_p(p) ldq_be_p(p)
 #define ldfl_p(p) ldfl_be_p(p)
 #define ldfq_p(p) ldfq_be_p(p)
@@ -568,10 +654,29 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_p(p, v) stq_be_p(p, v)
 #define stfl_p(p, v) stfl_be_p(p, v)
 #define stfq_p(p, v) stfq_be_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_le_p(p)
+#define ldswr_p(p) ldsw_le_p(p)
+#define ldulr_p(p) ldul_le_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_p(p) ldsl_le_p(p)
+#endif
+#define ldqr_p(p) ldq_le_p(p)
+#define ldflr_p(p) ldfl_le_p(p)
+#define ldfqr_p(p) ldfq_le_p(p)
+#define stwr_p(p, v) stw_le_p(p, v)
+#define stlr_p(p, v) stl_le_p(p, v)
+#define stqr_p(p, v) stq_le_p(p, v)
+#define stflr_p(p, v) stfl_le_p(p, v)
+#define stfqr_p(p, v) stfq_le_p(p, v)
 #else
+/* native-endian */
 #define lduw_p(p) lduw_le_p(p)
 #define ldsw_p(p) ldsw_le_p(p)
-#define ldl_p(p) ldl_le_p(p)
+#define ldul_p(p) ldul_le_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_p(p) ldsl_le_p(p)
+#endif
 #define ldq_p(p) ldq_le_p(p)
 #define ldfl_p(p) ldfl_le_p(p)
 #define ldfq_p(p) ldfq_le_p(p)
@@ -580,6 +685,21 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_p(p, v) stq_le_p(p, v)
 #define stfl_p(p, v) stfl_le_p(p, v)
 #define stfq_p(p, v) stfq_le_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_be_p(p)
+#define ldswr_p(p) ldsw_be_p(p)
+#define ldulr_p(p) ldul_be_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_p(p) ldsl_be_p(p)
+#endif
+#define ldqr_p(p) ldq_be_p(p)
+#define ldflr_p(p) ldfl_be_p(p)
+#define ldfqr_p(p) ldfq_be_p(p)
+#define stwr_p(p, v) stw_be_p(p, v)
+#define stlr_p(p, v) stl_be_p(p, v)
+#define stqr_p(p, v) stq_be_p(p, v)
+#define stflr_p(p, v) stfl_be_p(p, v)
+#define stfqr_p(p, v) stfq_be_p(p, v)
 #endif
 
 /* MMU memory access macros */
@@ -605,11 +725,15 @@ static inline void stfq_be_p(void *ptr, 
 #define laddr(x) (uint8_t *)(long)(x)
 #endif
 
+/* native-endian */
 #define ldub_raw(p) ldub_p(laddr((p)))
 #define ldsb_raw(p) ldsb_p(laddr((p)))
 #define lduw_raw(p) lduw_p(laddr((p)))
 #define ldsw_raw(p) ldsw_p(laddr((p)))
-#define ldl_raw(p) ldl_p(laddr((p)))
+#define ldul_raw(p) ldul_p(laddr((p)))
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_raw(p) ldsl_p(laddr((p)))
+#endif
 #define ldq_raw(p) ldq_p(laddr((p)))
 #define ldfl_raw(p) ldfl_p(laddr((p)))
 #define ldfq_raw(p) ldfq_p(laddr((p)))
@@ -619,16 +743,37 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_raw(p, v) stq_p(saddr((p)), v)
 #define stfl_raw(p, v) stfl_p(saddr((p)), v)
 #define stfq_raw(p, v) stfq_p(saddr((p)), v)
-
+/* reverse endian */
+#define ldubr_raw(p) ldub_p(laddr((p)))
+#define ldsbr_raw(p) ldsb_p(laddr((p)))
+#define lduwr_raw(p) lduwr_p(laddr((p)))
+#define ldswr_raw(p) ldswr_p(laddr((p)))
+#define ldulr_raw(p) ldulr_p(laddr((p)))
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_raw(p) ldslr_p(laddr((p)))
+#endif
+#define ldqr_raw(p) ldqr_p(laddr((p)))
+#define ldflr_raw(p) ldflr_p(laddr((p)))
+#define ldfqr_raw(p) ldfqr_p(laddr((p)))
+#define stbr_raw(p, v) stb_p(saddr((p)), v)
+#define stwr_raw(p, v) stwr_p(saddr((p)), v)
+#define stlr_raw(p, v) stlr_p(saddr((p)), v)
+#define stqr_raw(p, v) stqr_p(saddr((p)), v)
+#define stflr_raw(p, v) stflr_p(saddr((p)), v)
+#define stfqr_raw(p, v) stfqr_p(saddr((p)), v)
 
 #if defined(CONFIG_USER_ONLY)
 
 /* if user mode, no other memory access functions */
+/* native-endian */
 #define ldub(p) ldub_raw(p)
 #define ldsb(p) ldsb_raw(p)
 #define lduw(p) lduw_raw(p)
 #define ldsw(p) ldsw_raw(p)
-#define ldl(p) ldl_raw(p)
+#define ldul(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl(p) ldsl_raw(p)
+#endif
 #define ldq(p) ldq_raw(p)
 #define ldfl(p) ldfl_raw(p)
 #define ldfq(p) ldfq_raw(p)
@@ -638,19 +783,55 @@ static inline void stfq_be_p(void *ptr, 
 #define stq(p, v) stq_raw(p, v)
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr(p) ldub_raw(p)
+#define ldsbr(p) ldsb_raw(p)
+#define lduwr(p) lduwr_raw(p)
+#define ldswr(p) ldswr_raw(p)
+#define ldulr(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr(p) ldslr_raw(p)
+#endif
+#define ldqr(p) ldqr_raw(p)
+#define ldflr(p) ldflr_raw(p)
+#define ldfqr(p) ldfqr_raw(p)
+#define stbr(p, v) stb_raw(p, v)
+#define stwr(p, v) stwr_raw(p, v)
+#define stlr(p, v) stlr_raw(p, v)
+#define stqr(p, v) stqr_raw(p, v)
+#define stflr(p, v) stflr_raw(p, v)
+#define stfqr(p, v) stfqr_raw(p, v)
 
+/* native-endian */
 #define ldub_code(p) ldub_raw(p)
 #define ldsb_code(p) ldsb_raw(p)
 #define lduw_code(p) lduw_raw(p)
 #define ldsw_code(p) ldsw_raw(p)
-#define ldl_code(p) ldl_raw(p)
+#define ldul_code(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_code(p) ldsl_raw(p)
+#endif
 #define ldq_code(p) ldq_raw(p)
+/* reverse-endian */
+#define ldubr_code(p) ldub_raw(p)
+#define ldsbr_code(p) ldsb_raw(p)
+#define lduwr_code(p) lduwr_raw(p)
+#define ldswr_code(p) ldswr_raw(p)
+#define ldulr_code(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_code(p) ldslr_raw(p)
+#endif
+#define ldqr_code(p) ldqr_raw(p)
 
+/* native-endian */
 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
 #define lduw_kernel(p) lduw_raw(p)
 #define ldsw_kernel(p) ldsw_raw(p)
-#define ldl_kernel(p) ldl_raw(p)
+#define ldul_kernel(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_kernel(p) ldsl_raw(p)
+#endif
 #define ldq_kernel(p) ldq_raw(p)
 #define ldfl_kernel(p) ldfl_raw(p)
 #define ldfq_kernel(p) ldfq_raw(p)
@@ -660,6 +841,24 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_kernel(p, v) stq_raw(p, v)
 #define stfl_kernel(p, v) stfl_raw(p, v)
 #define stfq_kernel(p, vt) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr_kernel(p) ldub_raw(p)
+#define ldsbr_kernel(p) ldsb_raw(p)
+#define lduwr_kernel(p) lduwr_raw(p)
+#define ldswr_kernel(p) ldswr_raw(p)
+#define ldulr_kernel(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_kernel(p) ldslr_raw(p)
+#endif
+#define ldqr_kernel(p) ldqr_raw(p)
+#define ldflr_kernel(p) ldflr_raw(p)
+#define ldfqr_kernel(p) ldfqr_raw(p)
+#define stbr_kernel(p, v) stbr_raw(p, v)
+#define stwr_kernel(p, v) stwr_raw(p, v)
+#define stlr_kernel(p, v) stlr_raw(p, v)
+#define stqr_kernel(p, v) stqr_raw(p, v)
+#define stflr_kernel(p, v) stflr_raw(p, v)
+#define stfqr_kernel(p, vt) stfqr_raw(p, v)
 
 #endif /* defined(CONFIG_USER_ONLY) */
 
@@ -790,6 +989,8 @@ extern uint8_t *phys_ram_dirty;
    the physical address */
 #define IO_MEM_ROMD        (1)
 #define IO_MEM_SUBPAGE     (2)
+/* On some target CPUs, endiannes is stored in page tables */
+#define IO_MEM_REVERSE     (3)
 
 typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
 typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
@@ -821,7 +1022,7 @@ static inline void cpu_physical_memory_w
 }
 uint32_t ldub_phys(target_phys_addr_t addr);
 uint32_t lduw_phys(target_phys_addr_t addr);
-uint32_t ldl_phys(target_phys_addr_t addr);
+uint32_t ldul_phys(target_phys_addr_t addr);
 uint64_t ldq_phys(target_phys_addr_t addr);
 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val);
 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val);
Index: cpu-exec.c
===================================================================
RCS file: /sources/qemu/qemu/cpu-exec.c,v
retrieving revision 1.119
diff -u -d -d -p -r1.119 cpu-exec.c
--- cpu-exec.c	8 Oct 2007 13:16:13 -0000	1.119
+++ cpu-exec.c	13 Oct 2007 14:09:54 -0000
@@ -436,12 +436,12 @@ int cpu_exec(CPUState *env1)
                          /* FIXME: this should respect TPR */
                          env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
                          svm_check_intercept(SVM_EXIT_VINTR);
-                         intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
+                         intno = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
                          if (loglevel & CPU_LOG_TB_IN_ASM)
                              fprintf(logfile, "Servicing virtual hardware INT=0x%02x\n", intno);
 	                 do_interrupt(intno, 0, 0, -1, 1);
                          stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl),
-                                  ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
+                                  ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
 #if defined(__sparc__) && !defined(HOST_SOLARIS)
                          tmp_T0 = 0;
 #else
Index: exec-all.h
===================================================================
RCS file: /sources/qemu/qemu/exec-all.h,v
retrieving revision 1.67
diff -u -d -d -p -r1.67 exec-all.h
--- exec-all.h	8 Oct 2007 13:16:14 -0000	1.67
+++ exec-all.h	13 Oct 2007 14:09:54 -0000
@@ -569,6 +569,21 @@ void tlb_fill(target_ulong addr, int is_
 #define MEMSUFFIX _code
 #define env cpu_single_env
 
+/* native-endian */
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+
+/* reverse-endian */
+#define REVERSE_ENDIAN
 #define DATA_SIZE 1
 #include "softmmu_header.h"
 
@@ -580,6 +595,7 @@ void tlb_fill(target_ulong addr, int is_
 
 #define DATA_SIZE 8
 #include "softmmu_header.h"
+#undef REVERSE_ENDIAN
 
 #undef ACCESS_TYPE
 #undef MEMSUFFIX
Index: exec.c
===================================================================
RCS file: /sources/qemu/qemu/exec.c,v
retrieving revision 1.108
diff -u -d -d -p -r1.108 exec.c
--- exec.c	8 Oct 2007 13:16:14 -0000	1.108
+++ exec.c	13 Oct 2007 14:09:54 -0000
@@ -2202,7 +2202,7 @@ static uint32_t watch_mem_readw(void *op
 
 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
 {
-    return ldl_phys(addr);
+    return ldul_phys(addr);
 }
 
 /* Generate a debug exception if a watchpoint has been hit.
@@ -2507,7 +2507,7 @@ void cpu_physical_memory_rw(target_phys_
     uint8_t *ptr;
     uint32_t val;
     target_phys_addr_t page;
-    unsigned long pd;
+    unsigned long pd, addr1;
     PhysPageDesc *p;
 
     while (len > 0) {
@@ -2524,31 +2524,54 @@ void cpu_physical_memory_rw(target_phys_
 
         if (is_write) {
             if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                /* XXX: could force cpu_single_env to NULL to avoid
-                   potential bugs */
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit write access */
-                    val = ldl_p(buf);
-                    io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit write access */
-                    val = lduw_p(buf);
-                    io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        stlr_p(ptr, *(uint32_t *)buf);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        stwr_p(ptr, *(uint16_t *)buf);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *ptr = *buf;
+                    goto invalidate_code;
                 } else {
-                    /* 8 bit write access */
-                    val = ldub_p(buf);
-                    io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
-                    l = 1;
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    /* XXX: could force cpu_single_env to NULL to avoid
+                       potential bugs */
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit write access */
+                        val = ldul_p(buf);
+                        io_mem_write[io_index][2](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit write access */
+                        val = lduw_p(buf);
+                        io_mem_write[io_index][1](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit write access */
+                        val = ldub_p(buf);
+                        io_mem_write[io_index][0](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 1;
+                    }
                 }
             } else {
-                unsigned long addr1;
                 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
                 /* RAM case */
                 ptr = phys_ram_base + addr1;
                 memcpy(ptr, buf, l);
+            invalidate_code:
                 if (!cpu_physical_memory_is_dirty(addr1)) {
                     /* invalidate code */
                     tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
@@ -2560,23 +2583,45 @@ void cpu_physical_memory_rw(target_phys_
         } else {
             if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
                 !(pd & IO_MEM_ROMD)) {
-                /* I/O case */
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit read access */
-                    val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
-                    stl_p(buf, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit read access */
-                    val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
-                    stw_p(buf, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        *(uint32_t *)buf = ldulr_p(ptr);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        *(uint16_t *)buf = lduwr_p(ptr);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *buf = *ptr;
                 } else {
-                    /* 8 bit read access */
-                    val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr);
-                    stb_p(buf, val);
-                    l = 1;
+                    /* I/O case */
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit read access */
+                        val = io_mem_read[io_index][2](io_mem_opaque[io_index],
+                                                       addr);
+                        stl_p(buf, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit read access */
+                        val = io_mem_read[io_index][1](io_mem_opaque[io_index],
+                                                       addr);
+                        stw_p(buf, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit read access */
+                        val = io_mem_read[io_index][0](io_mem_opaque[io_index],
+                                                       addr);
+                        stb_p(buf, val);
+                        l = 1;
+                    }
                 }
             } else {
                 /* RAM case */
@@ -2632,7 +2677,7 @@ void cpu_physical_memory_write_rom(targe
 
 
 /* warning: addr must be aligned */
-uint32_t ldl_phys(target_phys_addr_t addr)
+uint32_t ldul_phys(target_phys_addr_t addr)
 {
     int io_index;
     uint8_t *ptr;
@@ -2656,7 +2701,7 @@ uint32_t ldl_phys(target_phys_addr_t add
         /* RAM case */
         ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldl_p(ptr);
+        val = ldul_p(ptr);
     }
     return val;
 }
@@ -2907,6 +2952,7 @@ void dump_exec_info(FILE *f,
 #define env cpu_single_env
 #define SOFTMMU_CODE_ACCESS
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2919,6 +2965,21 @@ void dump_exec_info(FILE *f,
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 #undef env
 
 #endif
Index: monitor.c
===================================================================
RCS file: /sources/qemu/qemu/monitor.c,v
retrieving revision 1.83
diff -u -d -d -p -r1.83 monitor.c
--- monitor.c	25 Sep 2007 17:28:42 -0000	1.83
+++ monitor.c	13 Oct 2007 14:09:54 -0000
@@ -595,7 +595,7 @@ static void memory_dump(int count, int f
                 v = lduw_raw(buf + i);
                 break;
             case 4:
-                v = (uint32_t)ldl_raw(buf + i);
+                v = (uint32_t)ldul_raw(buf + i);
                 break;
             case 8:
                 v = ldq_raw(buf + i);
Index: softmmu_exec.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_exec.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 softmmu_exec.h
--- softmmu_exec.h	30 Oct 2005 18:16:26 -0000	1.1
+++ softmmu_exec.h	13 Oct 2007 14:09:54 -0000
@@ -1,7 +1,7 @@
 /* Common softmmu definitions and inline routines.  */
 
-#define ldul_user ldl_user
-#define ldul_kernel ldl_kernel
+#define lduq_user ldq_user
+#define lduq_kernel ldq_kernel
 
 #define ACCESS_TYPE 0
 #define MEMSUFFIX _kernel
@@ -56,7 +56,10 @@
 #define ldsb(p) ldsb_data(p)
 #define lduw(p) lduw_data(p)
 #define ldsw(p) ldsw_data(p)
-#define ldl(p) ldl_data(p)
+#define ldul(p) ldul_data(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl(p) ldsl_data(p)
+#endif
 #define ldq(p) ldq_data(p)
 
 #define stb(p, v) stb_data(p, v)
Index: softmmu_header.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_header.h,v
retrieving revision 1.17
diff -u -d -d -p -r1.17 softmmu_header.h
--- softmmu_header.h	8 Oct 2007 13:16:14 -0000	1.17
+++ softmmu_header.h	13 Oct 2007 14:09:54 -0000
@@ -17,13 +17,19 @@
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
+
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
-#define USUFFIX l
+#define USUFFIX ul
+#if (TARGET_LONG_BITS == 64)
+#define DATA_STYPE int32_t
+#endif
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
@@ -38,6 +44,33 @@
 #else
 #error unsupported data size
 #endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX ulr
+#if (TARGET_LONG_BITS == 64)
+#define DATA_STYPE int32_t
+#endif
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#else
+#error unsupported data size
+#endif
+#endif /* defined(REVERSE_ENDIAN) */
 
 #if ACCESS_TYPE == 0
 
@@ -168,7 +201,7 @@ static inline RES_TYPE glue(glue(ld, USU
     return res;
 }
 
-#if DATA_SIZE <= 2
+#if defined(DATA_STYPE)
 static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 {
     int res;
@@ -291,7 +324,7 @@ static inline RES_TYPE glue(glue(ld, USU
     return res;
 }
 
-#if DATA_SIZE <= 2
+#if defined(DATA_STYPE)
 static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 {
     int res, index;
@@ -342,6 +375,7 @@ static inline void glue(glue(st, SUFFIX)
 
 #if ACCESS_TYPE != 3
 
+#if !defined(REVERSE_ENDIAN)
 #if DATA_SIZE == 8
 static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
 {
@@ -371,7 +405,7 @@ static inline float32 glue(ldfl, MEMSUFF
         float32 f;
         uint32_t i;
     } u;
-    u.i = glue(ldl, MEMSUFFIX)(ptr);
+    u.i = glue(ldul, MEMSUFFIX)(ptr);
     return u.f;
 }
 
@@ -386,6 +420,54 @@ static inline void glue(stfl, MEMSUFFIX)
 }
 #endif /* DATA_SIZE == 4 */
 
+#else /* defined(REVERSE_ENDIAN) */
+
+#if DATA_SIZE == 8
+static inline float64 glue(ldfqr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.i = glue(ldqr, MEMSUFFIX)(ptr);
+    return u.d;
+}
+
+static inline void glue(stfqr, MEMSUFFIX)(target_ulong ptr, float64 v)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.d = v;
+    glue(stqr, MEMSUFFIX)(ptr, u.i);
+}
+#endif /* DATA_SIZE == 8 */
+
+#if DATA_SIZE == 4
+static inline float32 glue(ldflr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = glue(ldulr, MEMSUFFIX)(ptr);
+    return u.f;
+}
+
+static inline void glue(stflr, MEMSUFFIX)(target_ulong ptr, float32 v)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    glue(stlr, MEMSUFFIX)(ptr, u.i);
+}
+#endif /* DATA_SIZE == 4 */
+
+#endif /* defined(REVERSE_ENDIAN) */
+
 #endif /* ACCESS_TYPE != 3 */
 
 #undef RES_TYPE
Index: softmmu_template.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_template.h,v
retrieving revision 1.18
diff -u -d -d -p -r1.18 softmmu_template.h
--- softmmu_template.h	17 Sep 2007 08:09:45 -0000	1.18
+++ softmmu_template.h	13 Oct 2007 14:09:54 -0000
@@ -19,25 +19,66 @@
  */
 #define DATA_SIZE (1 << SHIFT)
 
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
+#define RSUFFIX qr
+#define URSUFFIX qr
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
-#define USUFFIX l
+#define USUFFIX ul
+#define RSUFFIX lr
+#define URSUFFIX ulr
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define USUFFIX uw
+#define RSUFFIX wr
+#define URSUFFIX uwr
 #define DATA_TYPE uint16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define USUFFIX ub
+#define RSUFFIX br
+#define URSUFFIX ubr
 #define DATA_TYPE uint8_t
 #else
 #error unsupported data size
 #endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define RSUFFIX q
+#define URSUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX ulr
+#define RSUFFIX l
+#define URSUFFIX ul
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define RSUFFIX w
+#define URSUFFIX uw
+#define DATA_TYPE uint16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define RSUFFIX b
+#define URSUFFIX ub
+#define DATA_TYPE uint8_t
+#else
+#error unsupported data size
+#endif
+#endif /* defined(REVERSE_ENDIAN) */
+
 
 #ifdef SOFTMMU_CODE_ACCESS
 #define READ_ACCESS_TYPE 2
@@ -47,25 +88,62 @@
 #define ADDR_READ addr_read
 #endif
 
+#if (defined(TARGET_WORDS_BIGENDIAN) && !defined(REVERSE_ENDIAN)) || \
+    (!defined(TARGET_WORDS_BIGENDIAN) && defined(REVERSE_ENDIAN))
+#define ACCESS_WORDS_BIGENDIAN
+#endif
+
+/* Beware: we do not have reverse-endian accessors for IOs */
+#if defined(REVERSE_ENDIAN)
+#define DO_IOSWAP 1
+#else
+#define DO_IOSWAP 0
+#endif
+#if SHIFT == 1
+#define IOSWAP(val) bswap16(val)
+#elif SHIFT >= 2
+#define IOSWAP(val) bswap32(val)
+#else
+#define IOSWAP(val) (val)
+#endif
+
 static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
                                                         int is_user,
                                                         void *retaddr);
 static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
-                                              target_ulong tlb_addr)
+                                              target_ulong tlb_addr,
+                                              int do_ioswap)
 {
     DATA_TYPE res;
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
 #if SHIFT <= 2
     res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        res = IOSWAP(res);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    res = (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr) << 32;
-    res |= io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = (uint64_t)tmp << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= tmp;
 #else
-    res = io_mem_read[index][2](io_mem_opaque[index], physaddr);
-    res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = tmp;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= (uint64_t)tmp << 32;
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -92,10 +170,34 @@ DATA_TYPE REGPARM(1) glue(glue(__ld, SUF
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, READ_ACCESS_TYPE,
+                                            is_user, retaddr);
+                    }
+#endif
+                    res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
@@ -144,10 +246,45 @@ static DATA_TYPE glue(glue(slow_ld, SUFF
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages) */
+                        addr1 = addr & ~(DATA_SIZE - 1);
+                        addr2 = addr1 + DATA_SIZE;
+                        res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
+                                                                      is_user,
+                                                                      retaddr);
+                        res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
+                                                                      is_user,
+                                                                      retaddr);
+                        shift = (addr & (DATA_SIZE - 1)) * 8;
+#ifdef ACCESS_WORDS_BIGENDIAN
+                        res = (res1 >> shift) |
+                            (res2 << ((DATA_SIZE * 8) - shift));
+#else
+                        res = (res1 << shift) |
+                            (res2 >> ((DATA_SIZE * 8) - shift));
+#endif
+                        res = (DATA_TYPE)res;
+                    } else {
+                        /* unaligned/aligned access in the same page */
+                        res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
@@ -158,7 +295,7 @@ static DATA_TYPE glue(glue(slow_ld, SUFF
             res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
                                                           is_user, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
             res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
 #else
             res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
@@ -186,22 +323,39 @@ static void glue(glue(slow_st, SUFFIX), 
 static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
                                           DATA_TYPE val,
                                           target_ulong tlb_addr,
-                                          void *retaddr)
+                                          void *retaddr, int do_ioswap)
 {
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     env->mem_write_vaddr = tlb_addr;
     env->mem_write_pc = (unsigned long)retaddr;
 #if SHIFT <= 2
+    if (do_ioswap != DO_IOSWAP)
+        val = IOSWAP(val);
     io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val >> 32);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #else
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -224,12 +378,37 @@ void REGPARM(2) glue(glue(__st, SUFFIX),
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            retaddr = GETPC();
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    retaddr = GETPC();
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, 1, is_user, retaddr);
+                    }
+#endif
+                    glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr, val);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                retaddr = GETPC();
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+            /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
@@ -275,15 +454,48 @@ static void glue(glue(slow_st, SUFFIX), 
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        /* XXX: not efficient, but simple */
+                        for(i = 0;i < DATA_SIZE; i++) {
+#ifdef ACCESS_WORDS_BIGENDIAN
+                            glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
+                                                      is_user, retaddr);
+#else
+                            glue(slow_stb, MMUSUFFIX)(addr + i,
+                                                      val >> (((DATA_SIZE - 1) * 8)
+                                                              - (i * 8)),
+                                                      is_user, retaddr);
+#endif
+                        }
+
+                    } else {
+                        /* aligned/unaligned access in the same page */
+                        glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr,
+                                                      val);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
             for(i = 0;i < DATA_SIZE; i++) {
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
                 glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
                                           is_user, retaddr);
 #else
@@ -304,10 +516,15 @@ static void glue(glue(slow_st, SUFFIX), 
 
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
 
+#undef DO_IOSWAP
+#undef IOSWAP
+#undef ACCESS_WORDS_BIGENDIAN
 #undef READ_ACCESS_TYPE
 #undef SHIFT
 #undef DATA_TYPE
 #undef SUFFIX
 #undef USUFFIX
+#undef RSUFFIX
+#undef URSUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
Index: hw/eepro100.c
===================================================================
RCS file: /sources/qemu/qemu/hw/eepro100.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 eepro100.c
--- hw/eepro100.c	16 Sep 2007 21:07:52 -0000	1.6
+++ hw/eepro100.c	13 Oct 2007 14:09:55 -0000
@@ -723,7 +723,7 @@ static void eepro100_cu_command(EEPRO100
             uint32_t tbd_address = cb_address + 0x10;
             assert(tcb_bytes <= sizeof(buf));
             while (size < tcb_bytes) {
-                uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                uint32_t tx_buffer_address = ldul_phys(tbd_address);
                 uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                 //~ uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                 tbd_address += 8;
@@ -743,7 +743,7 @@ static void eepro100_cu_command(EEPRO100
                     /* Extended TCB. */
                     assert(tcb_bytes == 0);
                     for (; tbd_count < 2; tbd_count++) {
-                        uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                        uint32_t tx_buffer_address = ldul_phys(tbd_address);
                         uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                         uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                         tbd_address += 8;
@@ -760,7 +760,7 @@ static void eepro100_cu_command(EEPRO100
                 }
                 tbd_address = tbd_array;
                 for (; tbd_count < tx.tbd_count; tbd_count++) {
-                    uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                    uint32_t tx_buffer_address = ldul_phys(tbd_address);
                     uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                     uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                     tbd_address += 8;
Index: hw/pc.c
===================================================================
RCS file: /sources/qemu/qemu/hw/pc.c,v
retrieving revision 1.87
diff -u -d -d -p -r1.87 pc.c
--- hw/pc.c	9 Oct 2007 03:08:56 -0000	1.87
+++ hw/pc.c	13 Oct 2007 14:09:55 -0000
@@ -477,8 +477,8 @@ static void load_linux(const char *kerne
     }
 
     /* kernel protocol version */
-    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
-    if (ldl_p(header+0x202) == 0x53726448)
+    fprintf(stderr, "header magic: %#x\n", ldul_p(header+0x202));
+    if (ldul_p(header+0x202) == 0x53726448)
 	protocol = lduw_p(header+0x206);
     else
 	protocol = 0;
@@ -510,7 +510,7 @@ static void load_linux(const char *kerne
 
     /* highest address for loading the initrd */
     if (protocol >= 0x203)
-	initrd_max = ldl_p(header+0x22c);
+	initrd_max = ldul_p(header+0x22c);
     else
 	initrd_max = 0x37ffffff;
 
Index: hw/pl080.c
===================================================================
RCS file: /sources/qemu/qemu/hw/pl080.c,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 pl080.c
--- hw/pl080.c	16 Sep 2007 21:07:55 -0000	1.5
+++ hw/pl080.c	13 Oct 2007 14:09:55 -0000
@@ -162,10 +162,10 @@ again:
             if (size == 0) {
                 /* Transfer complete.  */
                 if (ch->lli) {
-                    ch->src = ldl_phys(ch->lli);
-                    ch->dest = ldl_phys(ch->lli + 4);
-                    ch->ctrl = ldl_phys(ch->lli + 12);
-                    ch->lli = ldl_phys(ch->lli + 8);
+                    ch->src = ldul_phys(ch->lli);
+                    ch->dest = ldul_phys(ch->lli + 4);
+                    ch->ctrl = ldul_phys(ch->lli + 12);
+                    ch->lli = ldul_phys(ch->lli + 8);
                 } else {
                     ch->conf &= ~PL080_CCONF_E;
                 }
Index: hw/sun4m.c
===================================================================
RCS file: /sources/qemu/qemu/hw/sun4m.c,v
retrieving revision 1.55
diff -u -d -d -p -r1.55 sun4m.c
--- hw/sun4m.c	6 Oct 2007 11:28:21 -0000	1.55
+++ hw/sun4m.c	13 Oct 2007 14:09:55 -0000
@@ -465,7 +465,7 @@ static void sun4m_load_kernel(long vram_
         }
         if (initrd_size > 0) {
             for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
-                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
+                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
                     == 0x48647253) { // HdrS
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
Index: hw/sun4u.c
===================================================================
RCS file: /sources/qemu/qemu/hw/sun4u.c,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 sun4u.c
--- hw/sun4u.c	6 Oct 2007 11:28:21 -0000	1.22
+++ hw/sun4u.c	13 Oct 2007 14:09:55 -0000
@@ -418,7 +418,7 @@ static void sun4u_init(int ram_size, int
         }
         if (initrd_size > 0) {
             for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
-                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
+                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
                     == 0x48647253) { // HdrS
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
Index: linux-user/elfload.c
===================================================================
RCS file: /sources/qemu/qemu/linux-user/elfload.c,v
retrieving revision 1.51
diff -u -d -d -p -r1.51 elfload.c
--- linux-user/elfload.c	9 Oct 2007 16:34:29 -0000	1.51
+++ linux-user/elfload.c	13 Oct 2007 14:09:55 -0000
@@ -336,7 +336,7 @@ static inline void init_thread(struct ta
     pos += sizeof(target_ulong);
     _regs->gpr[4] = pos;
     for (tmp = 1; tmp != 0; pos += sizeof(target_ulong))
-        tmp = ldl(pos);
+        tmp = ldul(pos);
     _regs->gpr[5] = pos;
 }
 
Index: linux-user/qemu.h
===================================================================
RCS file: /sources/qemu/qemu/linux-user/qemu.h,v
retrieving revision 1.40
diff -u -d -d -p -r1.40 qemu.h
--- linux-user/qemu.h	9 Oct 2007 16:34:29 -0000	1.40
+++ linux-user/qemu.h	13 Oct 2007 14:09:56 -0000
@@ -313,7 +313,7 @@ static inline void *lock_user_string(tar
 #define tput8(addr, val) stb(addr, val)
 #define tget16(addr) lduw(addr)
 #define tput16(addr, val) stw(addr, val)
-#define tget32(addr) ldl(addr)
+#define tget32(addr) ldul(addr)
 #define tput32(addr, val) stl(addr, val)
 #define tget64(addr) ldq(addr)
 #define tput64(addr, val) stq(addr, val)
@@ -321,7 +321,7 @@ static inline void *lock_user_string(tar
 #define tgetl(addr) ldq(addr)
 #define tputl(addr, val) stq(addr, val)
 #else
-#define tgetl(addr) ldl(addr)
+#define tgetl(addr) ldul(addr)
 #define tputl(addr, val) stl(addr, val)
 #endif
 
Index: linux-user/signal.c
===================================================================
RCS file: /sources/qemu/qemu/linux-user/signal.c,v
retrieving revision 1.45
diff -u -d -d -p -r1.45 signal.c
--- linux-user/signal.c	5 Oct 2007 17:01:51 -0000	1.45
+++ linux-user/signal.c	13 Oct 2007 14:09:56 -0000
@@ -878,28 +878,28 @@ restore_sigcontext(CPUX86State *env, str
         cpu_x86_load_seg(env, R_ES, lduw(&sc->es));
         cpu_x86_load_seg(env, R_DS, lduw(&sc->ds));
 
-        env->regs[R_EDI] = ldl(&sc->edi);
-        env->regs[R_ESI] = ldl(&sc->esi);
-        env->regs[R_EBP] = ldl(&sc->ebp);
-        env->regs[R_ESP] = ldl(&sc->esp);
-        env->regs[R_EBX] = ldl(&sc->ebx);
-        env->regs[R_EDX] = ldl(&sc->edx);
-        env->regs[R_ECX] = ldl(&sc->ecx);
-        env->eip = ldl(&sc->eip);
+        env->regs[R_EDI] = ldul(&sc->edi);
+        env->regs[R_ESI] = ldul(&sc->esi);
+        env->regs[R_EBP] = ldul(&sc->ebp);
+        env->regs[R_ESP] = ldul(&sc->esp);
+        env->regs[R_EBX] = ldul(&sc->ebx);
+        env->regs[R_EDX] = ldul(&sc->edx);
+        env->regs[R_ECX] = ldul(&sc->ecx);
+        env->eip = ldul(&sc->eip);
 
         cpu_x86_load_seg(env, R_CS, lduw(&sc->cs) | 3);
         cpu_x86_load_seg(env, R_SS, lduw(&sc->ss) | 3);
 
 	{
 		unsigned int tmpflags;
-                tmpflags = ldl(&sc->eflags);
+                tmpflags = ldul(&sc->eflags);
 		env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
                 //		regs->orig_eax = -1;		/* disable syscall checks */
 	}
 
 	{
 		struct _fpstate * buf;
-                buf = (void *)ldl(&sc->fpstate);
+                buf = (void *)ldul(&sc->fpstate);
 		if (buf) {
 #if 0
 			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
@@ -909,7 +909,7 @@ restore_sigcontext(CPUX86State *env, str
 		}
 	}
 
-        *peax = ldl(&sc->eax);
+        *peax = ldul(&sc->eax);
 	return err;
 #if 0
 badframe:
Index: linux-user/vm86.c
===================================================================
RCS file: /sources/qemu/qemu/linux-user/vm86.c,v
retrieving revision 1.11
diff -u -d -d -p -r1.11 vm86.c
--- linux-user/vm86.c	17 Sep 2007 08:09:50 -0000	1.11
+++ linux-user/vm86.c	13 Oct 2007 14:09:56 -0000
@@ -56,7 +56,7 @@ static inline unsigned int vm_getw(uint8
 
 static inline unsigned int vm_getl(uint8_t *segptr, unsigned int reg16)
 {
-    return ldl(segptr + (reg16 & 0xffff));
+    return ldul(segptr + (reg16 & 0xffff));
 }
 
 void save_v86_state(CPUX86State *env)
Index: target-alpha/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/exec.h,v
retrieving revision 1.3
diff -u -d -d -p -r1.3 exec.h
--- target-alpha/exec.h	16 Sep 2007 21:08:01 -0000	1.3
+++ target-alpha/exec.h	13 Oct 2007 14:09:56 -0000
@@ -62,6 +62,9 @@ register uint64_t T2 asm(AREG3);
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-alpha/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/helper.c,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 helper.c
--- target-alpha/helper.c	17 Sep 2007 08:09:51 -0000	1.4
+++ target-alpha/helper.c	13 Oct 2007 14:09:56 -0000
@@ -69,7 +69,7 @@ int cpu_alpha_handle_mmu_fault (CPUState
             env->exception_index = EXCP_DTB_MISS_PAL;
         else
             env->exception_index = EXCP_DTB_MISS_NATIVE;
-        opc = (ldl_code(env->pc) >> 21) << 4;
+        opc = (ldul_code(env->pc) >> 21) << 4;
         if (rw) {
             opc |= 0x9;
         } else {
Index: target-alpha/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/op_helper.c,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_helper.c
--- target-alpha/op_helper.c	16 Sep 2007 21:08:01 -0000	1.2
+++ target-alpha/op_helper.c	13 Oct 2007 14:09:56 -0000
@@ -1207,6 +1207,21 @@ void helper_st_phys_to_virt (void)
 
 #define MMUSUFFIX _mmu
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1218,6 +1233,7 @@ void helper_st_phys_to_virt (void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-alpha/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/op_mem.h,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_mem.h
--- target-alpha/op_mem.h	16 Sep 2007 21:08:01 -0000	1.2
+++ target-alpha/op_mem.h	13 Oct 2007 14:09:56 -0000
@@ -30,7 +30,7 @@ static inline uint32_t glue(ldl_l, MEMSU
 {
     env->lock = EA;
 
-    return glue(ldl, MEMSUFFIX)(EA);
+    return glue(ldul, MEMSUFFIX)(EA);
 }
 
 static inline uint32_t glue(ldq_l, MEMSUFFIX) (target_ulong EA)
@@ -82,7 +82,7 @@ ALPHA_LD_OP(bu, ldub);
 ALPHA_ST_OP(b, stb);
 ALPHA_LD_OP(wu, lduw);
 ALPHA_ST_OP(w, stw);
-ALPHA_LD_OP(l, ldl);
+ALPHA_LD_OP(l, ldul);
 ALPHA_ST_OP(l, stl);
 ALPHA_LD_OP(q, ldq);
 ALPHA_ST_OP(q, stq);
Index: target-alpha/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/translate.c,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 translate.c
--- target-alpha/translate.c	16 Sep 2007 21:08:01 -0000	1.5
+++ target-alpha/translate.c	13 Oct 2007 14:09:56 -0000
@@ -2010,7 +2010,7 @@ int gen_intermediate_code_internal (CPUS
                     ctx.pc, ctx.mem_idx);
         }
 #endif
-        insn = ldl_code(ctx.pc);
+        insn = ldul_code(ctx.pc);
 #if defined ALPHA_DEBUG_DISAS
         insn_count++;
         if (logfile != NULL) {
Index: target-arm/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-arm/exec.h,v
retrieving revision 1.13
diff -u -d -d -p -r1.13 exec.h
--- target-arm/exec.h	16 Sep 2007 21:08:01 -0000	1.13
+++ target-arm/exec.h	13 Oct 2007 14:09:56 -0000
@@ -64,6 +64,9 @@ static inline int cpu_halted(CPUState *e
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 /* In op_helper.c */
Index: target-arm/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/helper.c,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 helper.c
--- target-arm/helper.c	12 Oct 2007 06:47:46 -0000	1.22
+++ target-arm/helper.c	13 Oct 2007 14:09:56 -0000
@@ -297,7 +297,7 @@ void do_interrupt(CPUARMState *env)
             if (env->thumb) {
                 mask = lduw_code(env->regs[15] - 2) & 0xff;
             } else {
-                mask = ldl_code(env->regs[15] - 4) & 0xffffff;
+                mask = ldul_code(env->regs[15] - 4) & 0xffffff;
             }
             /* Only intercept calls from privileged modes, to provide some
                semblance of security.  */
@@ -473,7 +473,7 @@ static int get_phys_addr(CPUState *env, 
         /* Pagetable walk.  */
         /* Lookup l1 descriptor.  */
         table = (env->cp15.c2_base & 0xffffc000) | ((address >> 18) & 0x3ffc);
-        desc = ldl_phys(table);
+        desc = ldul_phys(table);
         type = (desc & 3);
         domain = (env->cp15.c3 >> ((desc >> 4) & 0x1e)) & 3;
         if (type == 0) {
@@ -502,7 +502,7 @@ static int get_phys_addr(CPUState *env, 
                 /* Fine pagetable.  */
                 table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
             }
-            desc = ldl_phys(table);
+            desc = ldul_phys(table);
             switch (desc & 3) {
             case 0: /* Page translation fault.  */
                 code = 7;
Index: target-arm/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/op_helper.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 op_helper.c
--- target-arm/op_helper.c	16 Sep 2007 21:08:02 -0000	1.6
+++ target-arm/op_helper.c	13 Oct 2007 14:09:56 -0000
@@ -180,6 +180,21 @@ void do_vfp_get_fpscr(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -191,6 +206,7 @@ void do_vfp_get_fpscr(void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-arm/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-arm/op_mem.h,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_mem.h
--- target-arm/op_mem.h	30 Apr 2007 02:02:16 -0000	1.2
+++ target-arm/op_mem.h	13 Oct 2007 14:09:56 -0000
@@ -1,18 +1,17 @@
 /* ARM memory operations.  */
 
-/* Load from address T1 into T0.  */
-#define MEM_LD_OP(name) \
+#define MEM_LD_OP(name, lname) \
 void OPPROTO glue(op_ld##name,MEMSUFFIX)(void) \
 { \
-    T0 = glue(ld##name,MEMSUFFIX)(T1); \
+    T0 = glue(ld##lname,MEMSUFFIX)(T1); \
     FORCE_RET(); \
 }
 
-MEM_LD_OP(ub)
-MEM_LD_OP(sb)
-MEM_LD_OP(uw)
-MEM_LD_OP(sw)
-MEM_LD_OP(l)
+MEM_LD_OP(ub,ub)
+MEM_LD_OP(sb,sb)
+MEM_LD_OP(uw,uw)
+MEM_LD_OP(sw,sw)
+MEM_LD_OP(l,ul)
 
 #undef MEM_LD_OP
 
@@ -45,7 +44,7 @@ void OPPROTO glue(op_swp##name,MEMSUFFIX
 }
 
 MEM_SWP_OP(b, ub)
-MEM_SWP_OP(l, l)
+MEM_SWP_OP(l, ul)
 
 #undef MEM_SWP_OP
 
@@ -82,7 +81,7 @@ void OPPROTO glue(op_iwmmxt_st##name,MEM
 
 MMX_MEM_OP(b, ub)
 MMX_MEM_OP(w, uw)
-MMX_MEM_OP(l, l)
+MMX_MEM_OP(l, ul)
 MMX_MEM_OP(q, q)
 
 #undef MMX_MEM_OP
Index: target-arm/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/translate.c,v
retrieving revision 1.57
diff -u -d -d -p -r1.57 translate.c
--- target-arm/translate.c	17 Sep 2007 08:09:51 -0000	1.57
+++ target-arm/translate.c	13 Oct 2007 14:09:56 -0000
@@ -2206,7 +2206,7 @@ static void disas_arm_insn(CPUState * en
 {
     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
 
-    insn = ldl_code(s->pc);
+    insn = ldul_code(s->pc);
     s->pc += 4;
 
     cond = insn >> 28;
Index: target-cris/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-cris/exec.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 exec.h
--- target-cris/exec.h	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/exec.h	13 Oct 2007 14:09:56 -0000
@@ -50,6 +50,9 @@ void tlb_fill (target_ulong addr, int is
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_cris_flush_flags(CPUCRISState *env, int cc_op);
Index: target-cris/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/helper.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 helper.c
--- target-cris/helper.c	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/helper.c	13 Oct 2007 14:09:56 -0000
@@ -106,7 +106,7 @@ void do_interrupt(CPUState *env)
 //			printf ("BREAK! %d\n", env->trapnr);
 			irqnum = env->trapnr;
 			ebp = env->pregs[SR_EBP];
-			isr = ldl_code(ebp + irqnum * 4);
+			isr = ldul_code(ebp + irqnum * 4);
 			env->pregs[SR_ERP] = env->pc + 2;
 			env->pc = isr;
 
@@ -117,7 +117,7 @@ void do_interrupt(CPUState *env)
 //			printf ("MMU miss\n");
 			irqnum = 4;
 			ebp = env->pregs[SR_EBP];
-			isr = ldl_code(ebp + irqnum * 4);
+			isr = ldul_code(ebp + irqnum * 4);
 			env->pregs[SR_ERP] = env->pc;
 			env->pc = isr;
 			cris_shift_ccs(env);
@@ -138,7 +138,7 @@ void do_interrupt(CPUState *env)
 					__builtin_clz(env->pending_interrupts);
 				irqnum += 0x30;
 				ebp = env->pregs[SR_EBP];
-				isr = ldl_code(ebp + irqnum * 4);
+				isr = ldul_code(ebp + irqnum * 4);
 				env->pregs[SR_ERP] = env->pc;
 				env->pc = isr;
 
Index: target-cris/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/op_helper.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 op_helper.c
--- target-cris/op_helper.c	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/op_helper.c	13 Oct 2007 14:09:56 -0000
@@ -25,6 +25,21 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -36,6 +51,7 @@
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-cris/op_mem.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/op_mem.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 op_mem.c
--- target-cris/op_mem.c	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/op_mem.c	13 Oct 2007 14:09:56 -0000
@@ -49,7 +49,7 @@ void glue(op_stw_T0_T1, MEMSUFFIX) (void
 }
 
 void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
-    T0 = glue(ldl, MEMSUFFIX) (T0);
+    T0 = glue(ldul, MEMSUFFIX) (T0);
     RETURN();
 }
 
Index: target-cris/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/translate.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 translate.c
--- target-cris/translate.c	8 Oct 2007 12:49:08 -0000	1.1
+++ target-cris/translate.c	13 Oct 2007 14:09:56 -0000
@@ -828,7 +828,7 @@ static int dec_prep_alu_m(DisasContext *
 		if (memsize == 1)
 			insn_len++;
 
-		imm = ldl_code(dc->pc + 2);
+		imm = ldul_code(dc->pc + 2);
 		if (memsize != 4) {
 			if (s_ext) {
 				imm = sign_extend(imm, (memsize * 8) - 1);
@@ -1962,7 +1962,7 @@ static unsigned int dec_lapc_im(DisasCon
 	rd = dc->op2;
 
 	cris_cc_mask(dc, 0);
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 	DIS(fprintf (logfile, "lapc 0x%x, $r%u\n", imm + dc->pc, dc->op2));
 	gen_op_movl_T0_im (dc->pc + imm);
 	gen_movl_reg_T0[rd] ();
@@ -1999,7 +1999,7 @@ static unsigned int dec_jas_im(DisasCont
 {
 	uint32_t imm;
 
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "jas 0x%x\n", imm));
 	cris_cc_mask(dc, 0);
@@ -2016,7 +2016,7 @@ static unsigned int dec_jasc_im(DisasCon
 {
 	uint32_t imm;
 
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "jasc 0x%x\n", imm));
 	cris_cc_mask(dc, 0);
@@ -2047,7 +2047,7 @@ static unsigned int dec_bcc_im(DisasCont
 	int32_t offset;
 	uint32_t cond = dc->op2;
 
-	offset = ldl_code(dc->pc + 2);
+	offset = ldul_code(dc->pc + 2);
 	offset = sign_extend(offset, 15);
 
 	DIS(fprintf (logfile, "b%s %d pc=%x dst=%x\n",
@@ -2065,7 +2065,7 @@ static unsigned int dec_bas_im(DisasCont
 	int32_t simm;
 
 
-	simm = ldl_code(dc->pc + 2);
+	simm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "bas 0x%x, $p%u\n", dc->pc + simm, dc->op2));
 	cris_cc_mask(dc, 0);
@@ -2081,7 +2081,7 @@ static unsigned int dec_bas_im(DisasCont
 static unsigned int dec_basc_im(DisasContext *dc)
 {
 	int32_t simm;
-	simm = ldl_code(dc->pc + 2);
+	simm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "basc 0x%x, $p%u\n", dc->pc + simm, dc->op2));
 	cris_cc_mask(dc, 0);
@@ -2259,7 +2259,7 @@ cris_decoder(DisasContext *dc)
 	int i;
 
 	/* Load a halfword onto the instruction register.  */
-	tmp = ldl_code(dc->pc);
+	tmp = ldul_code(dc->pc);
 	dc->ir = tmp & 0xffff;
 
 	/* Now decode it.  */
Index: target-i386/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/exec.h,v
retrieving revision 1.37
diff -u -d -d -p -r1.37 exec.h
--- target-i386/exec.h	23 Sep 2007 15:28:04 -0000	1.37
+++ target-i386/exec.h	13 Oct 2007 14:09:56 -0000
@@ -217,6 +217,9 @@ void check_iol_DX(void);
 #if !defined(CONFIG_USER_ONLY)
 
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 
 static inline double ldfq(target_ulong ptr)
 {
@@ -244,7 +247,7 @@ static inline float ldfl(target_ulong pt
         float f;
         uint32_t i;
     } u;
-    u.i = ldl(ptr);
+    u.i = ldul(ptr);
     return u.f;
 }
 
@@ -419,12 +422,12 @@ static inline void helper_fstt(CPU86_LDo
 
 static inline CPU86_LDouble helper_fldt(target_ulong ptr)
 {
-    return *(CPU86_LDouble *)ptr;
+    return *(CPU86_LDouble *)(unsigned long)ptr;
 }
 
 static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
 {
-    *(CPU86_LDouble *)ptr = f;
+    *(CPU86_LDouble *)(unsigned long)ptr = f;
 }
 
 #else
Index: target-i386/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/helper.c,v
retrieving revision 1.89
diff -u -d -d -p -r1.89 helper.c
--- target-i386/helper.c	27 Sep 2007 01:52:00 -0000	1.89
+++ target-i386/helper.c	13 Oct 2007 14:09:57 -0000
@@ -122,8 +122,8 @@ static inline int load_segment(uint32_t 
     if ((index + 7) > dt->limit)
         return -1;
     ptr = dt->base + index;
-    *e1_ptr = ldl_kernel(ptr);
-    *e2_ptr = ldl_kernel(ptr + 4);
+    *e1_ptr = ldul_kernel(ptr);
+    *e2_ptr = ldul_kernel(ptr + 4);
     return 0;
 }
 
@@ -186,7 +186,7 @@ static inline void get_ss_esp_from_tss(u
         *esp_ptr = lduw_kernel(env->tr.base + index);
         *ss_ptr = lduw_kernel(env->tr.base + index + 2);
     } else {
-        *esp_ptr = ldl_kernel(env->tr.base + index);
+        *esp_ptr = ldul_kernel(env->tr.base + index);
         *ss_ptr = lduw_kernel(env->tr.base + index + 4);
     }
 }
@@ -302,15 +302,15 @@ static void switch_tss(int tss_selector,
     /* read all the registers from the new TSS */
     if (type & 8) {
         /* 32 bit */
-        new_cr3 = ldl_kernel(tss_base + 0x1c);
-        new_eip = ldl_kernel(tss_base + 0x20);
-        new_eflags = ldl_kernel(tss_base + 0x24);
+        new_cr3 = ldul_kernel(tss_base + 0x1c);
+        new_eip = ldul_kernel(tss_base + 0x20);
+        new_eflags = ldul_kernel(tss_base + 0x24);
         for(i = 0; i < 8; i++)
-            new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
+            new_regs[i] = ldul_kernel(tss_base + (0x28 + i * 4));
         for(i = 0; i < 6; i++)
             new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
         new_ldt = lduw_kernel(tss_base + 0x60);
-        new_trap = ldl_kernel(tss_base + 0x64);
+        new_trap = ldul_kernel(tss_base + 0x64);
     } else {
         /* 16 bit */
         new_cr3 = 0;
@@ -341,7 +341,7 @@ static void switch_tss(int tss_selector,
         target_ulong ptr;
         uint32_t e2;
         ptr = env->gdt.base + (env->tr.selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
+        e2 = ldul_kernel(ptr + 4);
         e2 &= ~DESC_TSS_BUSY_MASK;
         stl_kernel(ptr + 4, e2);
     }
@@ -393,7 +393,7 @@ static void switch_tss(int tss_selector,
         target_ulong ptr;
         uint32_t e2;
         ptr = env->gdt.base + (tss_selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
+        e2 = ldul_kernel(ptr + 4);
         e2 |= DESC_TSS_BUSY_MASK;
         stl_kernel(ptr + 4, e2);
     }
@@ -456,8 +456,8 @@ static void switch_tss(int tss_selector,
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
             raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
         if (!(e2 & DESC_P_MASK))
@@ -580,7 +580,7 @@ do {\
 
 #define POPL(ssp, sp, sp_mask, val)\
 {\
-    val = (uint32_t)ldl_kernel((ssp) + (sp & (sp_mask)));\
+    val = (uint32_t)ldul_kernel((ssp) + (sp & (sp_mask)));\
     sp += 4;\
 }
 
@@ -629,8 +629,8 @@ static void do_interrupt_protected(int i
     if (intno * 8 + 7 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
     ptr = dt->base + intno * 8;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
+    e1 = ldul_kernel(ptr);
+    e2 = ldul_kernel(ptr + 4);
     /* check gate type */
     type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
     switch(type) {
@@ -875,9 +875,9 @@ static void do_interrupt64(int intno, in
     if (intno * 16 + 15 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
     ptr = dt->base + intno * 16;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
-    e3 = ldl_kernel(ptr + 8);
+    e1 = ldul_kernel(ptr);
+    e2 = ldul_kernel(ptr + 4);
+    e3 = ldul_kernel(ptr + 8);
     /* check gate type */
     type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
     switch(type) {
@@ -1147,7 +1147,7 @@ void do_interrupt_user(int intno, int is
 
     dt = &env->idt;
     ptr = dt->base + (intno * 8);
-    e2 = ldl_kernel(ptr + 4);
+    e2 = ldul_kernel(ptr + 4);
 
     dpl = (e2 >> DESC_DPL_SHIFT) & 3;
     cpl = env->hflags & HF_CPL_MASK;
@@ -1469,24 +1469,24 @@ void helper_rsm(void)
         cpu_x86_load_seg_cache(env, i,
                                lduw_phys(sm_state + offset),
                                ldq_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
+                               ldul_phys(sm_state + offset + 4),
                                (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8);
     }
 
     env->gdt.base = ldq_phys(sm_state + 0x7e68);
-    env->gdt.limit = ldl_phys(sm_state + 0x7e64);
+    env->gdt.limit = ldul_phys(sm_state + 0x7e64);
 
     env->ldt.selector = lduw_phys(sm_state + 0x7e70);
     env->ldt.base = ldq_phys(sm_state + 0x7e78);
-    env->ldt.limit = ldl_phys(sm_state + 0x7e74);
+    env->ldt.limit = ldul_phys(sm_state + 0x7e74);
     env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8;
 
     env->idt.base = ldq_phys(sm_state + 0x7e88);
-    env->idt.limit = ldl_phys(sm_state + 0x7e84);
+    env->idt.limit = ldul_phys(sm_state + 0x7e84);
 
     env->tr.selector = lduw_phys(sm_state + 0x7e90);
     env->tr.base = ldq_phys(sm_state + 0x7e98);
-    env->tr.limit = ldl_phys(sm_state + 0x7e94);
+    env->tr.limit = ldul_phys(sm_state + 0x7e94);
     env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8;
 
     EAX = ldq_phys(sm_state + 0x7ff8);
@@ -1500,51 +1500,51 @@ void helper_rsm(void)
     for(i = 8; i < 16; i++)
         env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8);
     env->eip = ldq_phys(sm_state + 0x7f78);
-    load_eflags(ldl_phys(sm_state + 0x7f70),
+    load_eflags(ldul_phys(sm_state + 0x7f70),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->dr[6] = ldl_phys(sm_state + 0x7f68);
-    env->dr[7] = ldl_phys(sm_state + 0x7f60);
+    env->dr[6] = ldul_phys(sm_state + 0x7f68);
+    env->dr[7] = ldul_phys(sm_state + 0x7f60);
 
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50));
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58));
+    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f48));
+    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7f50));
+    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7f58));
 
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff;
+        env->smbase = ldul_phys(sm_state + 0x7f00) & ~0x7fff;
     }
 #else
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8));
-    load_eflags(ldl_phys(sm_state + 0x7ff4),
+    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7ffc));
+    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7ff8));
+    load_eflags(ldul_phys(sm_state + 0x7ff4),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->eip = ldl_phys(sm_state + 0x7ff0);
-    EDI = ldl_phys(sm_state + 0x7fec);
-    ESI = ldl_phys(sm_state + 0x7fe8);
-    EBP = ldl_phys(sm_state + 0x7fe4);
-    ESP = ldl_phys(sm_state + 0x7fe0);
-    EBX = ldl_phys(sm_state + 0x7fdc);
-    EDX = ldl_phys(sm_state + 0x7fd8);
-    ECX = ldl_phys(sm_state + 0x7fd4);
-    EAX = ldl_phys(sm_state + 0x7fd0);
-    env->dr[6] = ldl_phys(sm_state + 0x7fcc);
-    env->dr[7] = ldl_phys(sm_state + 0x7fc8);
+    env->eip = ldul_phys(sm_state + 0x7ff0);
+    EDI = ldul_phys(sm_state + 0x7fec);
+    ESI = ldul_phys(sm_state + 0x7fe8);
+    EBP = ldul_phys(sm_state + 0x7fe4);
+    ESP = ldul_phys(sm_state + 0x7fe0);
+    EBX = ldul_phys(sm_state + 0x7fdc);
+    EDX = ldul_phys(sm_state + 0x7fd8);
+    ECX = ldul_phys(sm_state + 0x7fd4);
+    EAX = ldul_phys(sm_state + 0x7fd0);
+    env->dr[6] = ldul_phys(sm_state + 0x7fcc);
+    env->dr[7] = ldul_phys(sm_state + 0x7fc8);
 
-    env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff;
-    env->tr.base = ldl_phys(sm_state + 0x7f64);
-    env->tr.limit = ldl_phys(sm_state + 0x7f60);
-    env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
+    env->tr.selector = ldul_phys(sm_state + 0x7fc4) & 0xffff;
+    env->tr.base = ldul_phys(sm_state + 0x7f64);
+    env->tr.limit = ldul_phys(sm_state + 0x7f60);
+    env->tr.flags = (ldul_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
 
-    env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff;
-    env->ldt.base = ldl_phys(sm_state + 0x7f80);
-    env->ldt.limit = ldl_phys(sm_state + 0x7f7c);
-    env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
+    env->ldt.selector = ldul_phys(sm_state + 0x7fc0) & 0xffff;
+    env->ldt.base = ldul_phys(sm_state + 0x7f80);
+    env->ldt.limit = ldul_phys(sm_state + 0x7f7c);
+    env->ldt.flags = (ldul_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
 
-    env->gdt.base = ldl_phys(sm_state + 0x7f74);
-    env->gdt.limit = ldl_phys(sm_state + 0x7f70);
+    env->gdt.base = ldul_phys(sm_state + 0x7f74);
+    env->gdt.limit = ldul_phys(sm_state + 0x7f70);
 
-    env->idt.base = ldl_phys(sm_state + 0x7f58);
-    env->idt.limit = ldl_phys(sm_state + 0x7f54);
+    env->idt.base = ldul_phys(sm_state + 0x7f58);
+    env->idt.limit = ldul_phys(sm_state + 0x7f54);
 
     for(i = 0; i < 6; i++) {
         if (i < 3)
@@ -1552,16 +1552,16 @@ void helper_rsm(void)
         else
             offset = 0x7f2c + (i - 3) * 12;
         cpu_x86_load_seg_cache(env, i,
-                               ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
-                               ldl_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
-                               (ldl_phys(sm_state + offset) & 0xf0ff) << 8);
+                               ldul_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
+                               ldul_phys(sm_state + offset + 8),
+                               ldul_phys(sm_state + offset + 4),
+                               (ldul_phys(sm_state + offset) & 0xf0ff) << 8);
     }
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14));
+    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f14));
 
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff;
+        env->smbase = ldul_phys(sm_state + 0x7ef8) & ~0x7fff;
     }
 #endif
     CC_OP = CC_OP_EFLAGS;
@@ -1761,7 +1761,7 @@ void helper_enter_level(int level, int d
         while (--level) {
             esp -= 4;
             ebp -= 4;
-            stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask)));
+            stl(ssp + (esp & esp_mask), ldul(ssp + (ebp & esp_mask)));
         }
         esp -= 4;
         stl(ssp + (esp & esp_mask), T1);
@@ -1836,8 +1836,8 @@ void helper_lldt_T0(void)
         if ((index + entry_limit) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         if (!(e2 & DESC_P_MASK))
@@ -1845,7 +1845,7 @@ void helper_lldt_T0(void)
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             uint32_t e3;
-            e3 = ldl_kernel(ptr + 8);
+            e3 = ldul_kernel(ptr + 8);
             load_seg_cache_raw_dt(&env->ldt, e1, e2);
             env->ldt.base |= (target_ulong)e3 << 32;
         } else
@@ -1885,8 +1885,8 @@ void helper_ltr_T0(void)
         if ((index + entry_limit) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
         if ((e2 & DESC_S_MASK) ||
             (type != 1 && type != 9))
@@ -1896,8 +1896,8 @@ void helper_ltr_T0(void)
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             uint32_t e3, e4;
-            e3 = ldl_kernel(ptr + 8);
-            e4 = ldl_kernel(ptr + 12);
+            e3 = ldul_kernel(ptr + 8);
+            e4 = ldul_kernel(ptr + 12);
             if ((e4 >> DESC_TYPE_SHIFT) & 0xf)
                 raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
             load_seg_cache_raw_dt(&env->tr, e1, e2);
@@ -1943,8 +1943,8 @@ void load_seg(int seg_reg, int selector)
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
 
         if (!(e2 & DESC_S_MASK))
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
@@ -2273,7 +2273,7 @@ void helper_lcall_protected_T0_T1(int sh
                 PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector);
                 PUSHL(ssp, sp, sp_mask, ESP);
                 for(i = param_count - 1; i >= 0; i--) {
-                    val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
+                    val = ldul_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
                     PUSHL(ssp, sp, sp_mask, val);
                 }
             } else {
@@ -3569,8 +3569,8 @@ void helper_fxrstor(target_ulong ptr, in
 
     if (env->cr[4] & CR4_OSFXSR_MASK) {
         /* XXX: finish it */
-        env->mxcsr = ldl(ptr + 0x18);
-        //ldl(ptr + 0x1c);
+        env->mxcsr = ldul(ptr + 0x18);
+        //ldul(ptr + 0x1c);
         nb_xmm_regs = 8 << data64;
         addr = ptr + 0xa0;
         for(i = 0; i < nb_xmm_regs; i++) {
@@ -3867,6 +3867,7 @@ void update_fp_status(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -3879,6 +3880,21 @@ void update_fp_status(void)
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 #endif
 
 /* try to fill the TLB and return an exception if error. If retaddr is
@@ -4010,13 +4026,13 @@ void helper_vmrun(target_ulong addr)
     env->intercept_cr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write));
     env->intercept_dr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read));
     env->intercept_dr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write));
-    env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
+    env->intercept_exceptions = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
 
     env->gdt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
+    env->gdt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
 
     env->idt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
+    env->idt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
 
     /* clear exit_info_2 so we behave like the real hardware */
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
@@ -4025,7 +4041,7 @@ void helper_vmrun(target_ulong addr)
     cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
     cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
     env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
-    int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+    int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
     if (int_ctl & V_INTR_MASKING_MASK) {
         env->cr[8] = int_ctl & V_TPR_MASK;
         if (env->eflags & IF_MASK)
@@ -4073,11 +4089,11 @@ void helper_vmrun(target_ulong addr)
     regs_to_env();
 
     /* maybe we need to inject an event */
-    event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+    event_inj = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
     if (event_inj & SVM_EVTINJ_VALID) {
         uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
         uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
-        uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
+        uint32_t event_inj_err = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
 
         if (loglevel & CPU_LOG_TB_IN_ASM)
@@ -4309,7 +4325,7 @@ void vmexit(uint64_t exit_code, uint64_t
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]);
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]);
 
-    if ((int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
+    if ((int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
         int_ctl &= ~V_TPR_MASK;
         int_ctl |= env->cr[8] & V_TPR_MASK;
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl);
@@ -4330,10 +4346,10 @@ void vmexit(uint64_t exit_code, uint64_t
     env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
 
     env->gdt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
+    env->gdt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
 
     env->idt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
+    env->idt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
 
     cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
     cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
Index: target-i386/helper2.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/helper2.c,v
retrieving revision 1.52
diff -u -d -d -p -r1.52 helper2.c
--- target-i386/helper2.c	23 Sep 2007 15:28:04 -0000	1.52
+++ target-i386/helper2.c	13 Oct 2007 14:09:57 -0000
@@ -771,7 +771,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
         /* page directory entry */
         pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) &
             env->a20_mask;
-        pde = ldl_phys(pde_addr);
+        pde = ldul_phys(pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             error_code = 0;
             goto do_fault;
@@ -809,7 +809,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
             /* page directory entry */
             pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
                 env->a20_mask;
-            pte = ldl_phys(pte_addr);
+            pte = ldul_phys(pte_addr);
             if (!(pte & PG_PRESENT_MASK)) {
                 error_code = 0;
                 goto do_fault;
@@ -909,13 +909,13 @@ target_phys_addr_t cpu_get_phys_page_deb
 
             pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pml4e = ldl_phys(pml4e_addr);
+            pml4e = ldul_phys(pml4e_addr);
             if (!(pml4e & PG_PRESENT_MASK))
                 return -1;
 
             pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pdpe = ldl_phys(pdpe_addr);
+            pdpe = ldul_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK))
                 return -1;
         } else
@@ -923,14 +923,14 @@ target_phys_addr_t cpu_get_phys_page_deb
         {
             pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
                 env->a20_mask;
-            pdpe = ldl_phys(pdpe_addr);
+            pdpe = ldul_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK))
                 return -1;
         }
 
         pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) &
             env->a20_mask;
-        pde = ldl_phys(pde_addr);
+        pde = ldul_phys(pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             return -1;
         }
@@ -943,7 +943,7 @@ target_phys_addr_t cpu_get_phys_page_deb
             pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) &
                 env->a20_mask;
             page_size = 4096;
-            pte = ldl_phys(pte_addr);
+            pte = ldul_phys(pte_addr);
         }
     } else {
         if (!(env->cr[0] & CR0_PG_MASK)) {
@@ -952,7 +952,7 @@ target_phys_addr_t cpu_get_phys_page_deb
         } else {
             /* page directory entry */
             pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
-            pde = ldl_phys(pde_addr);
+            pde = ldul_phys(pde_addr);
             if (!(pde & PG_PRESENT_MASK))
                 return -1;
             if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
@@ -961,7 +961,7 @@ target_phys_addr_t cpu_get_phys_page_deb
             } else {
                 /* page directory entry */
                 pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
-                pte = ldl_phys(pte_addr);
+                pte = ldul_phys(pte_addr);
                 if (!(pte & PG_PRESENT_MASK))
                     return -1;
                 page_size = 4096;
Index: target-i386/op.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/op.c,v
retrieving revision 1.51
diff -u -d -d -p -r1.51 op.c
--- target-i386/op.c	23 Sep 2007 15:28:04 -0000	1.51
+++ target-i386/op.c	13 Oct 2007 14:09:57 -0000
@@ -716,8 +716,8 @@ void OPPROTO op_boundw(void)
 void OPPROTO op_boundl(void)
 {
     int low, high, v;
-    low = ldl(A0);
-    high = ldl(A0 + 4);
+    low = ldul(A0);
+    high = ldul(A0 + 4);
     v = T0;
     if (v < low || v > high) {
         raise_exception(EXCP05_BOUND);
@@ -747,8 +747,6 @@ void OPPROTO op_exit_tb(void)
 
 /* multiple size ops */
 
-#define ldul ldl
-
 #define SHIFT 0
 #include "ops_template.h"
 #undef SHIFT
@@ -1688,7 +1686,7 @@ CCTable cc_table[CC_OP_NB] = {
 void OPPROTO op_flds_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = ldl(A0);
+    FP_CONVERT.i32 = ldul(A0);
     FT0 = FP_CONVERT.f;
 #else
     FT0 = ldfl(A0);
@@ -1715,7 +1713,7 @@ void helper_fild_FT0_A0(void)
 
 void helper_fildl_FT0_A0(void)
 {
-    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
 }
 
 void helper_fildll_FT0_A0(void)
@@ -1753,10 +1751,10 @@ void OPPROTO op_fild_FT0_A0(void)
 void OPPROTO op_fildl_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    FP_CONVERT.i32 = (int32_t) ldul(A0);
     FT0 = (CPU86_LDouble)FP_CONVERT.i32;
 #else
-    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
 #endif
 }
 
@@ -1778,7 +1776,7 @@ void OPPROTO op_flds_ST0_A0(void)
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = ldl(A0);
+    FP_CONVERT.i32 = ldul(A0);
     env->fpregs[new_fpstt].d = FP_CONVERT.f;
 #else
     env->fpregs[new_fpstt].d = ldfl(A0);
@@ -1822,7 +1820,7 @@ void helper_fildl_ST0_A0(void)
 {
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
 }
@@ -1872,10 +1870,10 @@ void OPPROTO op_fildl_ST0_A0(void)
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    FP_CONVERT.i32 = (int32_t) ldul(A0);
     env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i32;
 #else
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
 #endif
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
Index: target-i386/ops_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/ops_mem.h,v
retrieving revision 1.7
diff -u -d -d -p -r1.7 ops_mem.h
--- target-i386/ops_mem.h	28 Nov 2005 21:02:17 -0000	1.7
+++ target-i386/ops_mem.h	13 Oct 2007 14:09:57 -0000
@@ -20,7 +20,7 @@ void OPPROTO glue(glue(op_ldsw, MEMSUFFI
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T0_A0)(void)
 {
-    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldub, MEMSUFFIX), _T1_A0)(void)
@@ -45,7 +45,7 @@ void OPPROTO glue(glue(op_ldsw, MEMSUFFI
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T1_A0)(void)
 {
-    T1 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+    T1 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_stb, MEMSUFFIX), _T0_A0)(void)
@@ -122,12 +122,12 @@ void OPPROTO glue(glue(op_sto, MEMSUFFIX
 #ifdef TARGET_X86_64
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T0_A0)(void)
 {
-    T0 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    T0 = glue(ldsl, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T1_A0)(void)
 {
-    T1 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    T1 = glue(ldsl, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T0_A0)(void)
Index: target-i386/svm.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/svm.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 svm.h
--- target-i386/svm.h	23 Sep 2007 15:30:28 -0000	1.1
+++ target-i386/svm.h	13 Oct 2007 14:09:57 -0000
@@ -340,13 +340,13 @@ static inline int svm_check_intercept(un
                     R_##seg_index, \
                     lduw_phys(addr + offsetof(struct vmcb, save.seg.selector)),\
                     ldq_phys(addr + offsetof(struct vmcb, save.seg.base)),\
-                    ldl_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
-                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), ldq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldl_phys(addr + offsetof(struct vmcb, save.seg.limit))))
+                    ldul_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
+                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), ldq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldul_phys(addr + offsetof(struct vmcb, save.seg.limit))))
 
 #define SVM_LOAD_SEG2(addr, seg_qemu, seg_vmcb) \
     env->seg_qemu.selector  = lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.selector)); \
     env->seg_qemu.base      = ldq_phys(addr + offsetof(struct vmcb, save.seg_vmcb.base)); \
-    env->seg_qemu.limit     = ldl_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
+    env->seg_qemu.limit     = ldul_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
     env->seg_qemu.flags     = vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.attrib)), env->seg_qemu.base, env->seg_qemu.limit)
 
 #define SVM_SAVE_SEG(addr, seg_qemu, seg_vmcb) \
Index: target-i386/translate-copy.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/translate-copy.c,v
retrieving revision 1.9
diff -u -d -d -p -r1.9 translate-copy.c
--- target-i386/translate-copy.c	17 Sep 2007 08:09:52 -0000	1.9
+++ target-i386/translate-copy.c	13 Oct 2007 14:09:57 -0000
@@ -207,7 +207,7 @@ static inline void gen_lea_modrm(DisasCo
         case 0:
             if (base == 5) {
                 base = -1;
-                disp = ldl_code(s->pc);
+                disp = ldul_code(s->pc);
                 s->pc += 4;
             } else {
                 disp = 0;
@@ -218,7 +218,7 @@ static inline void gen_lea_modrm(DisasCo
             break;
         default:
         case 2:
-            disp = ldl_code(s->pc);
+            disp = ldul_code(s->pc);
             s->pc += 4;
             break;
         }
@@ -266,7 +266,7 @@ static inline uint32_t insn_get(DisasCon
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = ldul_code(s->pc);
         s->pc += 4;
         break;
     }
Index: target-i386/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/translate.c,v
retrieving revision 1.72
diff -u -d -d -p -r1.72 translate.c
--- target-i386/translate.c	27 Sep 2007 01:52:00 -0000	1.72
+++ target-i386/translate.c	13 Oct 2007 14:09:58 -0000
@@ -1462,7 +1462,7 @@ static void gen_lea_modrm(DisasContext *
         case 0:
             if ((base & 7) == 5) {
                 base = -1;
-                disp = (int32_t)ldl_code(s->pc);
+                disp = (int32_t)ldul_code(s->pc);
                 s->pc += 4;
                 if (CODE64(s) && !havesib) {
                     disp += s->pc + s->rip_offset;
@@ -1476,7 +1476,7 @@ static void gen_lea_modrm(DisasContext *
             break;
         default:
         case 2:
-            disp = ldl_code(s->pc);
+            disp = ldul_code(s->pc);
             s->pc += 4;
             break;
         }
@@ -1736,7 +1736,7 @@ static inline uint32_t insn_get(DisasCon
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = ldul_code(s->pc);
         s->pc += 4;
         break;
     }
Index: target-m68k/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/exec.h,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 exec.h
--- target-m68k/exec.h	16 Sep 2007 21:08:03 -0000	1.4
+++ target-m68k/exec.h	13 Oct 2007 14:09:58 -0000
@@ -42,6 +42,9 @@ int cpu_m68k_handle_mmu_fault (CPUState 
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_m68k_flush_flags(CPUM68KState *env, int cc_op);
Index: target-m68k/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/op_helper.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 op_helper.c
--- target-m68k/op_helper.c	16 Sep 2007 21:08:03 -0000	1.6
+++ target-m68k/op_helper.c	13 Oct 2007 14:09:58 -0000
@@ -33,6 +33,21 @@ extern int semihosting_enabled;
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -44,6 +59,7 @@ extern int semihosting_enabled;
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
@@ -83,8 +99,8 @@ static void do_rte(void)
     uint32_t fmt;
 
     sp = env->aregs[7];
-    fmt = ldl_kernel(sp);
-    env->pc = ldl_kernel(sp + 4);
+    fmt = ldul_kernel(sp);
+    env->pc = ldul_kernel(sp + 4);
     sp |= (fmt >> 28) & 3;
     env->sr = fmt & 0xffff;
     m68k_switch_sp(env);
@@ -112,7 +128,7 @@ void do_interrupt(int is_hw)
                     && (env->sr & SR_S) != 0
                     && (env->pc & 3) == 0
                     && lduw_code(env->pc - 4) == 0x4e71
-                    && ldl_code(env->pc) == 0x4e7bf000) {
+                    && ldul_code(env->pc) == 0x4e7bf000) {
                 env->pc += 4;
                 do_m68k_semihosting(env, env->dregs[0]);
                 return;
@@ -153,7 +169,7 @@ void do_interrupt(int is_hw)
     stl_kernel(sp, fmt);
     env->aregs[7] = sp;
     /* Jump to vector.  */
-    env->pc = ldl_kernel(env->vbr + vector);
+    env->pc = ldul_kernel(env->vbr + vector);
 }
 
 #endif
Index: target-m68k/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/op_mem.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 op_mem.h
--- target-m68k/op_mem.h	23 May 2007 19:58:11 -0000	1.1
+++ target-m68k/op_mem.h	13 Oct 2007 14:09:58 -0000
@@ -11,7 +11,7 @@ MEM_LD_OP(8u32,ub)
 MEM_LD_OP(8s32,sb)
 MEM_LD_OP(16u32,uw)
 MEM_LD_OP(16s32,sw)
-MEM_LD_OP(32,l)
+MEM_LD_OP(32,ul)
 
 #undef MEM_LD_OP
 
Index: target-mips/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-mips/exec.h,v
retrieving revision 1.38
diff -u -d -d -p -r1.38 exec.h
--- target-mips/exec.h	9 Oct 2007 03:39:58 -0000	1.38
+++ target-mips/exec.h	13 Oct 2007 14:09:58 -0000
@@ -54,6 +54,9 @@ register target_ulong T2 asm(AREG3);
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 #if defined(TARGET_MIPSN32) || defined(TARGET_MIPS64)
Index: target-mips/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/op_helper.c,v
retrieving revision 1.65
diff -u -d -d -p -r1.65 op_helper.c
--- target-mips/op_helper.c	9 Oct 2007 03:39:58 -0000	1.65
+++ target-mips/op_helper.c	13 Oct 2007 14:09:58 -0000
@@ -544,6 +544,21 @@ static void do_unaligned_access (target_
 #define MMUSUFFIX _mmu
 #define ALIGNED_ONLY
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -555,6 +570,7 @@ static void do_unaligned_access (target_
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr)
 {
Index: target-mips/op_mem.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/op_mem.c,v
retrieving revision 1.14
diff -u -d -d -p -r1.14 op_mem.c
--- target-mips/op_mem.c	9 Oct 2007 03:12:08 -0000	1.14
+++ target-mips/op_mem.c	13 Oct 2007 14:09:58 -0000
@@ -57,13 +57,13 @@ void glue(op_sh, MEMSUFFIX) (void)
 
 void glue(op_lw, MEMSUFFIX) (void)
 {
-    T0 = glue(ldl, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 
 void glue(op_lwu, MEMSUFFIX) (void)
 {
-    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(T0);
+    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 
@@ -167,7 +167,7 @@ void glue(op_swr, MEMSUFFIX) (void)
 void glue(op_ll, MEMSUFFIX) (void)
 {
     T1 = T0;
-    T0 = glue(ldl, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)(T0);
     env->CP0_LLAddr = T1;
     RETURN();
 }
@@ -383,7 +383,7 @@ void glue(op_scd, MEMSUFFIX) (void)
 
 void glue(op_lwc1, MEMSUFFIX) (void)
 {
-    WT0 = glue(ldl, MEMSUFFIX)(T0);
+    WT0 = glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 void glue(op_swc1, MEMSUFFIX) (void)
Index: target-mips/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/translate.c,v
retrieving revision 1.106
diff -u -d -d -p -r1.106 translate.c
--- target-mips/translate.c	9 Oct 2007 03:39:58 -0000	1.106
+++ target-mips/translate.c	13 Oct 2007 14:09:58 -0000
@@ -6544,7 +6544,7 @@ gen_intermediate_code_internal (CPUState
             gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
             gen_opc_instr_start[lj] = 1;
         }
-        ctx.opcode = ldl_code(ctx.pc);
+        ctx.opcode = ldul_code(ctx.pc);
         decode_opc(env, &ctx);
         ctx.pc += 4;
 
Index: target-ppc/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/exec.h,v
retrieving revision 1.28
diff -u -d -d -p -r1.28 exec.h
--- target-ppc/exec.h	7 Oct 2007 18:19:25 -0000	1.28
+++ target-ppc/exec.h	13 Oct 2007 14:09:58 -0000
@@ -91,7 +91,12 @@ static always_inline target_ulong rotl64
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
+
+#include "softmmu_exec.h"
+#define REVERSE_ENDIAN
 #include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
+
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 void do_raise_exception_err (uint32_t exception, int error_code);
Index: target-ppc/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/helper.c,v
retrieving revision 1.75
diff -u -d -d -p -r1.75 helper.c
--- target-ppc/helper.c	8 Oct 2007 02:58:07 -0000	1.75
+++ target-ppc/helper.c	13 Oct 2007 14:09:58 -0000
@@ -514,8 +514,8 @@ static always_inline int _find_pte (mmu_
         } else
 #endif
         {
-            pte0 = ldl_phys(base + (i * 8));
-            pte1 =  ldl_phys(base + (i * 8) + 4);
+            pte0 = ldul_phys(base + (i * 8));
+            pte1 =  ldul_phys(base + (i * 8) + 4);
             r = pte32_check(ctx, pte0, pte1, h, rw);
 #if defined (DEBUG_MMU)
             if (loglevel != 0) {
@@ -623,7 +623,7 @@ static int slb_lookup (CPUPPCState *env,
     slb_nr = env->slb_nr;
     for (n = 0; n < slb_nr; n++) {
         tmp64 = ldq_phys(sr_base);
-        tmp = ldl_phys(sr_base + 8);
+        tmp = ldul_phys(sr_base + 8);
 #if defined(DEBUG_SLB)
         if (loglevel != 0) {
             fprintf(logfile, "%s: seg %d " PADDRX " %016" PRIx64 " %08"
@@ -671,7 +671,7 @@ target_ulong ppc_load_slb (CPUPPCState *
     sr_base = env->spr[SPR_ASR];
     sr_base += 12 * slb_nr;
     tmp64 = ldq_phys(sr_base);
-    tmp = ldl_phys(sr_base + 8);
+    tmp = ldul_phys(sr_base + 8);
     if (tmp64 & 0x0000000008000000ULL) {
         /* SLB entry is valid */
         /* Copy SLB bits 62:88 to Rt 37:63 (VSID 23:49) */
@@ -877,10 +877,10 @@ static int get_segment (CPUState *env, m
                         sdr, mask + 0x80);
                 for (curaddr = sdr; curaddr < (sdr + mask + 0x80);
                      curaddr += 16) {
-                    a0 = ldl_phys(curaddr);
-                    a1 = ldl_phys(curaddr + 4);
-                    a2 = ldl_phys(curaddr + 8);
-                    a3 = ldl_phys(curaddr + 12);
+                    a0 = ldul_phys(curaddr);
+                    a1 = ldul_phys(curaddr + 4);
+                    a2 = ldul_phys(curaddr + 8);
+                    a3 = ldul_phys(curaddr + 12);
                     if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) {
                         fprintf(logfile,
                                 PADDRX ": %08x %08x %08x %08x\n",
@@ -2219,7 +2219,7 @@ static always_inline void powerpc_excp (
 #endif
         /* XXX: this is false */
         /* Get rS/rD and rA from faulting opcode */
-        env->spr[SPR_DSISR] |= (ldl_code((env->nip - 4)) & 0x03FF0000) >> 16;
+        env->spr[SPR_DSISR] |= (ldul_code((env->nip - 4)) & 0x03FF0000) >> 16;
         goto store_current;
     case POWERPC_EXCP_PROGRAM:   /* Program exception                        */
         switch (env->error_code & ~0xF) {
Index: target-ppc/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper.c,v
retrieving revision 1.49
diff -u -d -d -p -r1.49 op_helper.c
--- target-ppc/op_helper.c	7 Oct 2007 17:13:43 -0000	1.49
+++ target-ppc/op_helper.c	13 Oct 2007 14:09:58 -0000
@@ -2291,6 +2291,21 @@ DO_SPE_OP1(fsctuf);
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2302,6 +2317,7 @@ DO_SPE_OP1(fsctuf);
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-ppc/op_helper.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper.h,v
retrieving revision 1.21
diff -u -d -d -p -r1.21 op_helper.h
--- target-ppc/op_helper.h	7 Oct 2007 17:13:44 -0000	1.21
+++ target-ppc/op_helper.h	13 Oct 2007 14:09:58 -0000
@@ -37,19 +37,6 @@ void glue(do_POWER2_lfq_le, MEMSUFFIX) (
 void glue(do_POWER2_stfq, MEMSUFFIX) (void);
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void);
 
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst);
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stsw_64, MEMSUFFIX) (int src);
-void glue(do_stsw_le_64, MEMSUFFIX) (int src);
-void glue(do_lmw_64, MEMSUFFIX) (int dst);
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stmw_64, MEMSUFFIX) (int src);
-void glue(do_stmw_le_64, MEMSUFFIX) (int src);
-void glue(do_icbi_64, MEMSUFFIX) (void);
-void glue(do_dcbz_64, MEMSUFFIX) (void);
-#endif
-
 #else
 
 void do_print_mem_EA (target_ulong EA);
Index: target-ppc/op_helper_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper_mem.h,v
retrieving revision 1.14
diff -u -d -d -p -r1.14 op_helper_mem.h
--- target-ppc/op_helper_mem.h	7 Oct 2007 17:13:44 -0000	1.14
+++ target-ppc/op_helper_mem.h	13 Oct 2007 14:09:58 -0000
@@ -19,85 +19,33 @@
  */
 
 /* Multiple word / string load and store */
-static always_inline target_ulong glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000UL) >> 24) | ((tmp & 0x00FF0000UL) >> 8) |
-        ((tmp & 0x0000FF00UL) << 8) | ((tmp & 0x000000FFUL) << 24);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  target_ulong data)
-{
-    uint32_t tmp =
-        ((data & 0xFF000000UL) >> 24) | ((data & 0x00FF0000UL) >> 8) |
-        ((data & 0x0000FF00UL) << 8) | ((data & 0x000000FFUL) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
 void glue(do_lmw, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldul, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lmw_le, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldulr, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw_le, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_le_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lsw, MEMSUFFIX) (int dst)
 {
@@ -105,71 +53,33 @@ void glue(do_lsw, MEMSUFFIX) (int dst)
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst++] = glue(ldul, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
-#endif
 
 void glue(do_stsw, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 void glue(do_lsw_le, MEMSUFFIX) (int dst)
 {
@@ -177,71 +87,33 @@ void glue(do_lsw_le, MEMSUFFIX) (int dst
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst++] = glue(ldulr, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
-#endif
 
 void glue(do_stsw_le, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_le_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 /* Instruction cache invalidation helper */
 void glue(do_icbi, MEMSUFFIX) (void)
@@ -252,27 +124,11 @@ void glue(do_icbi, MEMSUFFIX) (void)
      * (not a fetch) by the MMU. To be sure it will be so,
      * do the load "by hand".
      */
-    tmp = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint32_t)T0,
-                             (uint32_t)(T0 + env->icache_line_size));
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_icbi_64, MEMSUFFIX) (void)
-{
-    uint64_t tmp;
-    /* Invalidate one cache line :
-     * PowerPC specification says this is to be treated like a load
-     * (not a fetch) by the MMU. To be sure it will be so,
-     * do the load "by hand".
-     */
-    tmp = glue(ldq, MEMSUFFIX)((uint64_t)T0);
+    tmp = glue(ldul, MEMSUFFIX)(T0);
     T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint64_t)T0,
-                             (uint64_t)(T0 + env->icache_line_size));
+    /* We assume it would not wrap around 2^32 on 32 bits targets */
+    tb_invalidate_page_range(T0, T0 + env->icache_line_size);
 }
-#endif
 
 void glue(do_dcbz, MEMSUFFIX) (void)
 {
@@ -281,90 +137,43 @@ void glue(do_dcbz, MEMSUFFIX) (void)
     /* XXX: should be 970 specific (?) */
     if (((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1)
         dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x1C), 0);
-    if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x3CUL), 0);
-        if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x7CUL), 0);
-        }
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_dcbz_64, MEMSUFFIX) (void)
-{
-    int dcache_line_size = env->dcache_line_size;
-
-    /* XXX: should be 970 specific (?) */
-    if (((env->spr[SPR_970_HID5] >> 6) & 0x3) == 0x2)
-        dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x1C), 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x00, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x04, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x08, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x0C, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x10, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x14, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x18, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x1C, 0);
     if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x3CUL), 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x20UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x24UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x28UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x2CUL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x30UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x34UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x38UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x3CUL, 0);
         if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x7CUL), 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x40UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x44UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x48UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x4CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x50UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x54UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x58UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x5CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x60UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x64UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x68UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x6CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x70UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x74UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x78UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x7CUL, 0);
         }
     }
 }
-#endif
 
 /* PowerPC 601 specific instructions (POWER bridge) */
 // XXX: to be tested
@@ -400,26 +209,6 @@ void glue(do_POWER2_lfq, MEMSUFFIX) (voi
     FT1 = glue(ldfq, MEMSUFFIX)((uint32_t)(T0 + 4));
 }
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 void glue(do_POWER2_lfq_le, MEMSUFFIX) (void)
 {
     FT0 = glue(ldfqr, MEMSUFFIX)((uint32_t)(T0 + 4));
@@ -432,25 +221,6 @@ void glue(do_POWER2_stfq, MEMSUFFIX) (vo
     glue(stfq, MEMSUFFIX)((uint32_t)(T0 + 4), FT1);
 }
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void)
 {
     glue(stfqr, MEMSUFFIX)((uint32_t)(T0 + 4), FT0);
Index: target-ppc/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_mem.h,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 op_mem.h
--- target-ppc/op_mem.h	7 Oct 2007 18:19:25 -0000	1.22
+++ target-ppc/op_mem.h	13 Oct 2007 14:09:58 -0000
@@ -18,85 +18,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-static always_inline uint16_t glue(ld16r, MEMSUFFIX) (target_ulong EA)
-{
-    uint16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline int32_t glue(ld16rs, MEMSUFFIX) (target_ulong EA)
-{
-    int16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return (int16_t)((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline uint32_t glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline uint64_t glue(ld64r, MEMSUFFIX) (target_ulong EA)
-{
-    uint64_t tmp = glue(ldq, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00000000000000ULL) >> 56) |
-        ((tmp & 0x00FF000000000000ULL) >> 40) |
-        ((tmp & 0x0000FF0000000000ULL) >> 24) |
-        ((tmp & 0x000000FF00000000ULL) >> 8) |
-        ((tmp & 0x00000000FF000000ULL) << 8) |
-        ((tmp & 0x0000000000FF0000ULL) << 24) |
-        ((tmp & 0x000000000000FF00ULL) << 40) |
-        ((tmp & 0x00000000000000FFULL) << 54);
-}
-#endif
-
-#if defined(TARGET_PPC64)
-static always_inline int64_t glue(ldsl, MEMSUFFIX) (target_ulong EA)
-{
-    return (int32_t)glue(ldl, MEMSUFFIX)(EA);
-}
-
-static always_inline int64_t glue(ld32rs, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return (int32_t)((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-#endif
-
-static always_inline void glue(st16r, MEMSUFFIX) (target_ulong EA,
-                                                  uint16_t data)
-{
-    uint16_t tmp = ((data & 0xFF00) >> 8) | ((data & 0x00FF) << 8);
-    glue(stw, MEMSUFFIX)(EA, tmp);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  uint32_t data)
-{
-    uint32_t tmp = ((data & 0xFF000000) >> 24) | ((data & 0x00FF0000) >> 8) |
-        ((data & 0x0000FF00) << 8) | ((data & 0x000000FF) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline void glue(st64r, MEMSUFFIX) (target_ulong EA,
-                                                  uint64_t data)
-{
-    uint64_t tmp = ((data & 0xFF00000000000000ULL) >> 56) |
-        ((data & 0x00FF000000000000ULL) >> 40) |
-        ((data & 0x0000FF0000000000ULL) >> 24) |
-        ((data & 0x000000FF00000000ULL) >> 8) |
-        ((data & 0x00000000FF000000ULL) << 8) |
-        ((data & 0x0000000000FF0000ULL) << 24) |
-        ((data & 0x000000000000FF00ULL) << 40) |
-        ((data & 0x00000000000000FFULL) << 56);
-    glue(stq, MEMSUFFIX)(EA, tmp);
-}
-#endif
-
 /***                             Integer load                              ***/
 #define PPC_LD_OP(name, op)                                                   \
 void OPPROTO glue(glue(op_l, name), MEMSUFFIX) (void)                         \
@@ -130,10 +51,11 @@ void OPPROTO glue(glue(glue(op_st, name)
 }
 #endif
 
+/* Native-endian fixed-point memory loads                                    */
 PPC_LD_OP(bz, ldub);
 PPC_LD_OP(ha, ldsw);
 PPC_LD_OP(hz, lduw);
-PPC_LD_OP(wz, ldl);
+PPC_LD_OP(wz, ldul);
 #if defined(TARGET_PPC64)
 PPC_LD_OP(d, ldq);
 PPC_LD_OP(wa, ldsl);
@@ -142,23 +64,24 @@ PPC_LD_OP_64(wa, ldsl);
 PPC_LD_OP_64(bz, ldub);
 PPC_LD_OP_64(ha, ldsw);
 PPC_LD_OP_64(hz, lduw);
-PPC_LD_OP_64(wz, ldl);
+PPC_LD_OP_64(wz, ldul);
 #endif
 
-PPC_LD_OP(ha_le, ld16rs);
-PPC_LD_OP(hz_le, ld16r);
-PPC_LD_OP(wz_le, ld32r);
+/* Reverse-endian fixed-point memory loads                                   */
+PPC_LD_OP(ha_le, ldswr);
+PPC_LD_OP(hz_le, lduwr);
+PPC_LD_OP(wz_le, ldulr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP(d_le, ld64r);
-PPC_LD_OP(wa_le, ld32rs);
-PPC_LD_OP_64(d_le, ld64r);
-PPC_LD_OP_64(wa_le, ld32rs);
-PPC_LD_OP_64(ha_le, ld16rs);
-PPC_LD_OP_64(hz_le, ld16r);
-PPC_LD_OP_64(wz_le, ld32r);
+PPC_LD_OP(d_le, ldqr);
+PPC_LD_OP(wa_le, ldslr);
+PPC_LD_OP_64(d_le, ldqr);
+PPC_LD_OP_64(wa_le, ldslr);
+PPC_LD_OP_64(ha_le, ldswr);
+PPC_LD_OP_64(hz_le, lduwr);
+PPC_LD_OP_64(wz_le, ldulr);
 #endif
 
-/***                              Integer store                            ***/
+/* Native-endian fixed-point memory stores                                   */
 PPC_ST_OP(b, stb);
 PPC_ST_OP(h, stw);
 PPC_ST_OP(w, stl);
@@ -170,120 +93,110 @@ PPC_ST_OP_64(h, stw);
 PPC_ST_OP_64(w, stl);
 #endif
 
-PPC_ST_OP(h_le, st16r);
-PPC_ST_OP(w_le, st32r);
+/* Reverse-endian fixed-point memory stores                                  */
+PPC_ST_OP(h_le, stwr);
+PPC_ST_OP(w_le, stlr);
 #if defined(TARGET_PPC64)
-PPC_ST_OP(d_le, st64r);
-PPC_ST_OP_64(d_le, st64r);
-PPC_ST_OP_64(h_le, st16r);
-PPC_ST_OP_64(w_le, st32r);
+PPC_ST_OP(d_le, stqr);
+PPC_ST_OP_64(d_le, stqr);
+PPC_ST_OP_64(h_le, stwr);
+PPC_ST_OP_64(w_le, stlr);
 #endif
 
-/***                Integer load and store with byte reverse               ***/
-PPC_LD_OP(hbr, ld16r);
-PPC_LD_OP(wbr, ld32r);
-PPC_ST_OP(hbr, st16r);
-PPC_ST_OP(wbr, st32r);
+/* Native-endian fixed-point loads and stores with byte-reverse              */
+PPC_LD_OP(hbr, lduwr);
+PPC_LD_OP(wbr, ldulr);
+PPC_ST_OP(hbr, stwr);
+PPC_ST_OP(wbr, stlr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP_64(hbr, ld16r);
-PPC_LD_OP_64(wbr, ld32r);
-PPC_ST_OP_64(hbr, st16r);
-PPC_ST_OP_64(wbr, st32r);
+PPC_LD_OP_64(hbr, lduwr);
+PPC_LD_OP_64(wbr, ldulr);
+PPC_ST_OP_64(hbr, stwr);
+PPC_ST_OP_64(wbr, stlr);
 #endif
 
+/* Reverse-endian fixed-point loads and stores with byte-reverse             */
 PPC_LD_OP(hbr_le, lduw);
-PPC_LD_OP(wbr_le, ldl);
+PPC_LD_OP(wbr_le, ldul);
 PPC_ST_OP(hbr_le, stw);
 PPC_ST_OP(wbr_le, stl);
 #if defined(TARGET_PPC64)
 PPC_LD_OP_64(hbr_le, lduw);
-PPC_LD_OP_64(wbr_le, ldl);
+PPC_LD_OP_64(wbr_le, ldul);
 PPC_ST_OP_64(hbr_le, stw);
 PPC_ST_OP_64(wbr_le, stl);
 #endif
 
-/***                    Integer load and store multiple                    ***/
+/* Native-endian fixed-point loads and stores multiple                       */
 void OPPROTO glue(op_lmw, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
-{
-    glue(do_lmw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
-void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
 {
     glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
+#endif
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
+/* Reverse-endian fixed-point loads and stores multiple                      */
+void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
 {
-    glue(do_stmw_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
 void OPPROTO glue(op_stmw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
-{
-    glue(do_stmw_le_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-/***                    Integer load and store strings                     ***/
-void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw, MEMSUFFIX)(PARAM1);
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+    glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+/* Native-endian loads and stores string                                     */
+void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -303,6 +216,7 @@ void OPPROTO glue(op_lswx, MEMSUFFIX) (v
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
@@ -320,13 +234,44 @@ void OPPROTO glue(op_lswx_64, MEMSUFFIX)
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
+void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
+{
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
+/* Reverse-endian loads and stores string                                    */
+void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+{
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
 void OPPROTO glue(op_lswx_le, MEMSUFFIX) (void)
 {
     /* Note: T1 comes from xer_bc then no cast is needed */
@@ -337,6 +282,7 @@ void OPPROTO glue(op_lswx_le, MEMSUFFIX)
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
@@ -354,29 +300,16 @@ void OPPROTO glue(op_lswx_le_64, MEMSUFF
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
-{
-    glue(do_stsw, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
-{
-    glue(do_stsw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
 void OPPROTO glue(op_stsw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
@@ -384,7 +317,7 @@ void OPPROTO glue(op_stsw_le, MEMSUFFIX)
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_stsw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_stsw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -432,38 +365,9 @@ PPC_STF_OP_64(fs, stfs);
 PPC_STF_OP_64(fiwx, stfiwx);
 #endif
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 static always_inline void glue(stfsr, MEMSUFFIX) (target_ulong EA, double d)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = float64_to_float32(d, &env->fp_status);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stfl, MEMSUFFIX)(EA, u.f);
+    glue(stflr, MEMSUFFIX)(EA, float64_to_float32(d, &env->fp_status));
 }
 
 static always_inline void glue(stfiwxr, MEMSUFFIX) (target_ulong EA, double d)
@@ -475,11 +379,7 @@ static always_inline void glue(stfiwxr, 
 
     /* Store the low order 32 bits without any conversion */
     u.d = d;
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stl, MEMSUFFIX)(EA, u.u);
+    glue(stlr, MEMSUFFIX)(EA, u.u);
 }
 
 PPC_STF_OP(fd_le, stfqr);
@@ -520,40 +420,9 @@ PPC_LDF_OP_64(fd, ldfq);
 PPC_LDF_OP_64(fs, ldfs);
 #endif
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 static always_inline double glue(ldfsr, MEMSUFFIX) (target_ulong EA)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = glue(ldfl, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-
-    return float32_to_float64(u.f, &env->fp_status);
+    return float32_to_float64(glue(ldflr, MEMSUFFIX)(EA), &env->fp_status);
 }
 
 PPC_LDF_OP(fd_le, ldfqr);
@@ -569,7 +438,7 @@ void OPPROTO glue(op_lwarx, MEMSUFFIX) (
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -581,7 +450,7 @@ void OPPROTO glue(op_lwarx_64, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -615,7 +484,7 @@ void OPPROTO glue(op_lwarx_le, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -627,7 +496,7 @@ void OPPROTO glue(op_lwarx_le_64, MEMSUF
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -638,7 +507,7 @@ void OPPROTO glue(op_ldarx_le, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -649,7 +518,7 @@ void OPPROTO glue(op_ldarx_le_64, MEMSUF
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -731,7 +600,7 @@ void OPPROTO glue(op_stwcx_le, MEMSUFFIX
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -748,7 +617,7 @@ void OPPROTO glue(op_stwcx_le_64, MEMSUF
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -764,7 +633,7 @@ void OPPROTO glue(op_stdcx_le, MEMSUFFIX
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -780,7 +649,7 @@ void OPPROTO glue(op_stdcx_le_64, MEMSUF
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -862,6 +731,7 @@ void OPPROTO glue(op_dcbz_l128, MEMSUFFI
 
 void OPPROTO glue(op_dcbz, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
@@ -940,7 +810,7 @@ void OPPROTO glue(op_dcbz_l128_64, MEMSU
 
 void OPPROTO glue(op_dcbz_64, MEMSUFFIX) (void)
 {
-    glue(do_dcbz_64, MEMSUFFIX)();
+    glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -948,6 +818,7 @@ void OPPROTO glue(op_dcbz_64, MEMSUFFIX)
 /* Instruction cache block invalidate */
 void OPPROTO glue(op_icbi, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
@@ -955,7 +826,7 @@ void OPPROTO glue(op_icbi, MEMSUFFIX) (v
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_icbi_64, MEMSUFFIX) (void)
 {
-    glue(do_icbi_64, MEMSUFFIX)();
+    glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -963,14 +834,14 @@ void OPPROTO glue(op_icbi_64, MEMSUFFIX)
 /* External access */
 void OPPROTO glue(op_eciwx, MEMSUFFIX) (void)
 {
-    T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
@@ -991,28 +862,28 @@ void OPPROTO glue(op_ecowx_64, MEMSUFFIX
 
 void OPPROTO glue(op_eciwx_le, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_le_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
 
 void OPPROTO glue(op_ecowx_le, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_ecowx_le_64, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
     RETURN();
 }
 #endif
@@ -1070,8 +941,8 @@ void OPPROTO glue(op_vr_lvx, MEMSUFFIX) 
 
 void OPPROTO glue(op_vr_lvx_le, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint32_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint32_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint32_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx, MEMSUFFIX) (void)
@@ -1082,8 +953,8 @@ void OPPROTO glue(op_vr_stvx, MEMSUFFIX)
 
 void OPPROTO glue(op_vr_stvx_le, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 
 #if defined(TARGET_PPC64)
@@ -1095,8 +966,8 @@ void OPPROTO glue(op_vr_lvx_64, MEMSUFFI
 
 void OPPROTO glue(op_vr_lvx_le_64, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint64_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint64_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint64_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx_64, MEMSUFFIX) (void)
@@ -1107,8 +978,8 @@ void OPPROTO glue(op_vr_stvx_64, MEMSUFF
 
 void OPPROTO glue(op_vr_stvx_le_64, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 #endif
 #undef VR_DWORD0
@@ -1163,14 +1034,14 @@ _PPC_SPE_ST_OP(name, op)
 #if !defined(TARGET_PPC64)
 PPC_SPE_LD_OP(dd, ldq);
 PPC_SPE_ST_OP(dd, stq);
-PPC_SPE_LD_OP(dd_le, ld64r);
-PPC_SPE_ST_OP(dd_le, st64r);
+PPC_SPE_LD_OP(dd_le, ldqr);
+PPC_SPE_ST_OP(dd_le, stqr);
 #endif
 static always_inline uint64_t glue(spe_ldw, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ldl, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ldl, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldul, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldul, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw, spe_ldw);
@@ -1184,16 +1055,16 @@ PPC_SPE_ST_OP(dw, spe_stdw);
 static always_inline uint64_t glue(spe_ldw_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld32r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld32r, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldulr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldulr, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw_le, spe_ldw_le);
 static always_inline void glue(spe_stdw_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data >> 32);
-    glue(st32r, MEMSUFFIX)(EA + 4, data);
+    glue(stlr, MEMSUFFIX)(EA, data >> 32);
+    glue(stlr, MEMSUFFIX)(EA + 4, data);
 }
 PPC_SPE_ST_OP(dw_le, spe_stdw_le);
 static always_inline uint64_t glue(spe_ldh, MEMSUFFIX) (target_ulong EA)
@@ -1218,20 +1089,20 @@ PPC_SPE_ST_OP(dh, spe_stdh);
 static always_inline uint64_t glue(spe_ldh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 4) << 16;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 6);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 4) << 16;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 6);
     return ret;
 }
 PPC_SPE_LD_OP(dh_le, spe_ldh_le);
 static always_inline void glue(spe_stdh_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 4, data >> 16);
-    glue(st16r, MEMSUFFIX)(EA + 6, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 4, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA + 6, data);
 }
 PPC_SPE_ST_OP(dh_le, spe_stdh_le);
 static always_inline uint64_t glue(spe_lwhe, MEMSUFFIX) (target_ulong EA)
@@ -1252,16 +1123,16 @@ PPC_SPE_ST_OP(whe, spe_stwhe);
 static always_inline uint64_t glue(spe_lwhe_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 16;
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 16;
     return ret;
 }
 PPC_SPE_LD_OP(whe_le, spe_lwhe_le);
 static always_inline void glue(spe_stwhe_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 16);
 }
 PPC_SPE_ST_OP(whe_le, spe_stwhe_le);
 static always_inline uint64_t glue(spe_lwhou, MEMSUFFIX) (target_ulong EA)
@@ -1290,24 +1161,24 @@ PPC_SPE_ST_OP(who, spe_stwho);
 static always_inline uint64_t glue(spe_lwhou_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2);
     return ret;
 }
 PPC_SPE_LD_OP(whou_le, spe_lwhou_le);
 static always_inline uint64_t glue(spe_lwhos_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = ((uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA))) << 32;
-    ret |= (uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA + 2));
+    ret = ((uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA))) << 32;
+    ret |= (uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA + 2));
     return ret;
 }
 PPC_SPE_LD_OP(whos_le, spe_lwhos_le);
 static always_inline void glue(spe_stwho_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 2, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 2, data);
 }
 PPC_SPE_ST_OP(who_le, spe_stwho_le);
 #if !defined(TARGET_PPC64)
@@ -1320,7 +1191,7 @@ PPC_SPE_ST_OP(wwo, spe_stwwo);
 static always_inline void glue(spe_stwwo_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data);
+    glue(stlr, MEMSUFFIX)(EA, data);
 }
 PPC_SPE_ST_OP(wwo_le, spe_stwwo_le);
 #endif
@@ -1334,14 +1205,14 @@ PPC_SPE_LD_OP(h, spe_lh);
 static always_inline uint64_t glue(spe_lh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 48) | ((uint64_t)tmp << 16);
 }
 PPC_SPE_LD_OP(h_le, spe_lh_le);
 static always_inline uint64_t glue(spe_lwwsplat, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ldl, MEMSUFFIX)(EA);
+    tmp = glue(ldul, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat, spe_lwwsplat);
@@ -1349,7 +1220,7 @@ static always_inline
 uint64_t glue(spe_lwwsplat_le, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ld32r, MEMSUFFIX)(EA);
+    tmp = glue(ldulr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat_le, spe_lwwsplat_le);
@@ -1369,9 +1240,9 @@ uint64_t glue(spe_lwhsplat_le, MEMSUFFIX
 {
     uint64_t ret;
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     ret = ((uint64_t)tmp << 48) | ((uint64_t)tmp << 32);
-    tmp = glue(ld16r, MEMSUFFIX)(EA + 2);
+    tmp = glue(lduwr, MEMSUFFIX)(EA + 2);
     ret |= ((uint64_t)tmp << 16) | (uint64_t)tmp;
     return ret;
 }
Index: target-ppc/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/translate.c,v
retrieving revision 1.92
diff -u -d -d -p -r1.92 translate.c
--- target-ppc/translate.c	7 Oct 2007 23:10:08 -0000	1.92
+++ target-ppc/translate.c	13 Oct 2007 14:09:59 -0000
@@ -6763,7 +6763,7 @@ static always_inline int gen_intermediat
                     ctx.nip, 1 - msr_pr, msr_ir);
         }
 #endif
-        ctx.opcode = ldl_code(ctx.nip);
+        ctx.opcode = ldul_code(ctx.nip);
         if (msr_le) {
             ctx.opcode = ((ctx.opcode & 0xFF000000) >> 24) |
                 ((ctx.opcode & 0x00FF0000) >> 8) |
Index: target-sh4/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/exec.h,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 exec.h
--- target-sh4/exec.h	16 Sep 2007 21:08:05 -0000	1.5
+++ target-sh4/exec.h	13 Oct 2007 14:09:59 -0000
@@ -48,6 +48,9 @@ static inline int cpu_halted(CPUState *e
 
 #ifndef CONFIG_USER_ONLY
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 #define RETURN() __asm__ __volatile__("")
Index: target-sh4/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/op_helper.c,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 op_helper.c
--- target-sh4/op_helper.c	16 Sep 2007 21:08:05 -0000	1.4
+++ target-sh4/op_helper.c	13 Oct 2007 14:09:59 -0000
@@ -30,6 +30,21 @@ void do_raise_exception(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -41,6 +56,7 @@ void do_raise_exception(void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 void tlb_fill(target_ulong addr, int is_write, int is_user, void *retaddr)
 {
Index: target-sh4/op_mem.c
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/op_mem.c,v
retrieving revision 1.3
diff -u -d -d -p -r1.3 op_mem.c
--- target-sh4/op_mem.c	16 Sep 2007 21:08:05 -0000	1.3
+++ target-sh4/op_mem.c	13 Oct 2007 14:09:59 -0000
@@ -48,7 +48,7 @@ void glue(op_stw_T0_T1, MEMSUFFIX) (void
 }
 
 void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
-    T0 = glue(ldl, MEMSUFFIX) (T0);
+    T0 = glue(ldul, MEMSUFFIX) (T0);
     RETURN();
 }
 
Index: target-sparc/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/exec.h,v
retrieving revision 1.21
diff -u -d -d -p -r1.21 exec.h
--- target-sparc/exec.h	30 Sep 2007 19:38:11 -0000	1.21
+++ target-sparc/exec.h	13 Oct 2007 14:09:59 -0000
@@ -100,6 +100,9 @@ void do_rdpsr();
 /* XXX: move that to a generic header */
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-sparc/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/helper.c,v
retrieving revision 1.27
diff -u -d -d -p -r1.27 helper.c
--- target-sparc/helper.c	24 Sep 2007 19:44:09 -0000	1.27
+++ target-sparc/helper.c	13 Oct 2007 14:09:59 -0000
@@ -129,7 +129,7 @@ int get_physical_address (CPUState *env,
     /* SPARC reference MMU table walk: Context table->L1->L2->PTE */
     /* Context base + context number */
     pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
 
     /* Ctx pde */
     switch (pde & PTE_ENTRYTYPE_MASK) {
@@ -141,7 +141,7 @@ int get_physical_address (CPUState *env,
         return 4 << 2;
     case 1: /* L0 PDE */
         pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
-        pde = ldl_phys(pde_ptr);
+        pde = ldul_phys(pde_ptr);
 
         switch (pde & PTE_ENTRYTYPE_MASK) {
         default:
@@ -151,7 +151,7 @@ int get_physical_address (CPUState *env,
             return (1 << 8) | (4 << 2);
         case 1: /* L1 PDE */
             pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
-            pde = ldl_phys(pde_ptr);
+            pde = ldul_phys(pde_ptr);
 
             switch (pde & PTE_ENTRYTYPE_MASK) {
             default:
@@ -161,7 +161,7 @@ int get_physical_address (CPUState *env,
                 return (2 << 8) | (4 << 2);
             case 1: /* L2 PDE */
                 pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
-                pde = ldl_phys(pde_ptr);
+                pde = ldul_phys(pde_ptr);
 
                 switch (pde & PTE_ENTRYTYPE_MASK) {
                 default:
@@ -265,7 +265,7 @@ target_ulong mmu_probe(CPUState *env, ta
     /* Context base + context number */
     pde_ptr = (target_phys_addr_t)(env->mmuregs[1] << 4) +
         (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
 
     switch (pde & PTE_ENTRYTYPE_MASK) {
     default:
@@ -277,7 +277,7 @@ target_ulong mmu_probe(CPUState *env, ta
         if (mmulev == 3)
             return pde;
         pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
-        pde = ldl_phys(pde_ptr);
+        pde = ldul_phys(pde_ptr);
 
         switch (pde & PTE_ENTRYTYPE_MASK) {
         default:
@@ -290,7 +290,7 @@ target_ulong mmu_probe(CPUState *env, ta
             if (mmulev == 2)
                 return pde;
             pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
-            pde = ldl_phys(pde_ptr);
+            pde = ldul_phys(pde_ptr);
 
             switch (pde & PTE_ENTRYTYPE_MASK) {
             default:
@@ -303,7 +303,7 @@ target_ulong mmu_probe(CPUState *env, ta
                 if (mmulev == 1)
                     return pde;
                 pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
-                pde = ldl_phys(pde_ptr);
+                pde = ldul_phys(pde_ptr);
 
                 switch (pde & PTE_ENTRYTYPE_MASK) {
                 default:
@@ -330,7 +330,7 @@ void dump_mmu(CPUState *env)
 
     printf("MMU dump:\n");
     pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
     printf("Root ptr: " TARGET_FMT_plx ", ctx: %d\n",
            (target_phys_addr_t)env->mmuregs[1] << 4, env->mmuregs[2]);
     for (n = 0, va = 0; n < 256; n++, va += 16 * 1024 * 1024) {
Index: target-sparc/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/op_helper.c,v
retrieving revision 1.41
diff -u -d -d -p -r1.41 op_helper.c
--- target-sparc/op_helper.c	1 Oct 2007 17:07:58 -0000	1.41
+++ target-sparc/op_helper.c	13 Oct 2007 14:09:59 -0000
@@ -184,11 +184,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_code(T0 & ~3);
+            ret = ldul_code(T0 & ~3);
             break;
         case 8:
-            ret = ldl_code(T0 & ~3);
-            T0 = ldl_code((T0 + 4) & ~3);
+            ret = ldul_code(T0 & ~3);
+            T0 = ldul_code((T0 + 4) & ~3);
             break;
         }
         break;
@@ -202,11 +202,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_user(T0 & ~3);
+            ret = ldul_user(T0 & ~3);
             break;
         case 8:
-            ret = ldl_user(T0 & ~3);
-            T0 = ldl_user((T0 + 4) & ~3);
+            ret = ldul_user(T0 & ~3);
+            T0 = ldul_user((T0 + 4) & ~3);
             break;
         }
         break;
@@ -220,11 +220,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_kernel(T0 & ~3);
+            ret = ldul_kernel(T0 & ~3);
             break;
         case 8:
-            ret = ldl_kernel(T0 & ~3);
-            T0 = ldl_kernel((T0 + 4) & ~3);
+            ret = ldul_kernel(T0 & ~3);
+            T0 = ldul_kernel((T0 + 4) & ~3);
             break;
         }
         break;
@@ -243,11 +243,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_phys(T0 & ~3);
+            ret = ldul_phys(T0 & ~3);
             break;
         case 8:
-            ret = ldl_phys(T0 & ~3);
-            T0 = ldl_phys((T0 + 4) & ~3);
+            ret = ldul_phys(T0 & ~3);
+            T0 = ldul_phys((T0 + 4) & ~3);
             break;
         }
         break;
@@ -264,13 +264,13 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32));
+            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
+                            | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         case 8:
-            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32));
-            T0 = ldl_phys((target_phys_addr_t)((T0 + 4) & ~3)
+            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
+                            | ((target_phys_addr_t)(asi & 0xf) << 32));
+            T0 = ldul_phys((target_phys_addr_t)((T0 + 4) & ~3)
                            | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         }
@@ -422,7 +422,7 @@ void helper_st_asi(int asi, int size)
             uint32_t src = T1 & ~3, dst = T0 & ~3, temp;
 
             for (i = 0; i < 32; i += 4, src += 4, dst += 4) {
-                temp = ldl_kernel(src);
+                temp = ldul_kernel(src);
                 stl_kernel(dst, temp);
             }
         }
@@ -525,7 +525,7 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_raw(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_raw(T0 & ~3);
+                ret = ldul_raw(T0 & ~3);
                 break;
             default:
             case 8:
@@ -673,7 +673,7 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_kernel(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_kernel(T0 & ~3);
+                ret = ldul_kernel(T0 & ~3);
                 break;
             default:
             case 8:
@@ -689,7 +689,7 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_user(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_user(T0 & ~3);
+                ret = ldul_user(T0 & ~3);
                 break;
             default:
             case 8:
@@ -711,7 +711,7 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_phys(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_phys(T0 & ~3);
+                ret = ldul_phys(T0 & ~3);
                 break;
             default:
             case 8:
@@ -1497,6 +1497,21 @@ static void do_unaligned_access(target_u
 #define ALIGNED_ONLY
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1508,6 +1523,7 @@ static void do_unaligned_access(target_u
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access(target_ulong addr, int is_write, int is_user,
                                 void *retaddr)
Index: target-sparc/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/op_mem.h,v
retrieving revision 1.10
diff -u -d -d -p -r1.10 op_mem.h
--- target-sparc/op_mem.h	21 Sep 2007 19:10:53 -0000	1.10
+++ target-sparc/op_mem.h	13 Oct 2007 14:09:59 -0000
@@ -17,7 +17,7 @@ void OPPROTO glue(glue(op_, name), MEMSU
     glue(op, MEMSUFFIX)(T0, T1);                                      \
 }
 
-SPARC_LD_OP(ld, ldl);
+SPARC_LD_OP(ld, ldul);
 SPARC_LD_OP(ldub, ldub);
 SPARC_LD_OP(lduh, lduw);
 SPARC_LD_OP_S(ldsb, ldsb);
@@ -42,15 +42,15 @@ void OPPROTO glue(op_ldstub, MEMSUFFIX)(
 
 void OPPROTO glue(op_swap, MEMSUFFIX)(void)
 {
-    target_ulong tmp = glue(ldl, MEMSUFFIX)(T0);
+    target_ulong tmp = glue(ldul, MEMSUFFIX)(T0);
     glue(stl, MEMSUFFIX)(T0, T1);       /* XXX: Should be Atomically */
     T1 = tmp;
 }
 
 void OPPROTO glue(op_ldd, MEMSUFFIX)(void)
 {
-    T1 = glue(ldl, MEMSUFFIX)(T0);
-    T0 = glue(ldl, MEMSUFFIX)((T0 + 4));
+    T1 = glue(ldul, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)((T0 + 4));
 }
 
 /***                         Floating-point store                          ***/
@@ -78,12 +78,12 @@ void OPPROTO glue(op_lddf, MEMSUFFIX) (v
 #ifdef TARGET_SPARC64
 void OPPROTO glue(op_lduw, MEMSUFFIX)(void)
 {
-    T1 = (uint64_t)(glue(ldl, MEMSUFFIX)(T0) & 0xffffffff);
+    T1 = (uint64_t)(glue(ldul, MEMSUFFIX)(T0) & 0xffffffff);
 }
 
 void OPPROTO glue(op_ldsw, MEMSUFFIX)(void)
 {
-    T1 = (int64_t)(glue(ldl, MEMSUFFIX)(T0) & 0xffffffff);
+    T1 = (int64_t)(glue(ldul, MEMSUFFIX)(T0) & 0xffffffff);
 }
 
 SPARC_LD_OP(ldx, ldq);
Index: target-sparc/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/translate.c,v
retrieving revision 1.74
diff -u -d -d -p -r1.74 translate.c
--- target-sparc/translate.c	10 Oct 2007 19:11:54 -0000	1.74
+++ target-sparc/translate.c	13 Oct 2007 14:09:59 -0000
@@ -1089,7 +1089,7 @@ static void disas_sparc_insn(DisasContex
 {
     unsigned int insn, opc, rs1, rs2, rd;
 
-    insn = ldl_code(dc->pc);
+    insn = ldul_code(dc->pc);
     opc = GET_FIELD(insn, 0, 1);
 
     rd = GET_FIELD(insn, 2, 6);

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-13 14:17       ` J. Mayer
@ 2007-10-13 22:07         ` J. Mayer
  2007-10-13 22:53           ` Thiemo Seufer
  2007-10-14  8:19           ` Blue Swirl
  0 siblings, 2 replies; 20+ messages in thread
From: J. Mayer @ 2007-10-13 22:07 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 2880 bytes --]

On Sat, 2007-10-13 at 16:17 +0200, J. Mayer wrote:
> On Sat, 2007-10-13 at 16:07 +0300, Blue Swirl wrote:
> > On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> > > On Sat, 2007-10-13 at 13:47 +0300, Blue Swirl wrote:
> > > > On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> > > > > The problem:
> > > > > some CPU architectures, namely PowerPC and maybe others, offers
> > > > > facilities to access the memory or I/O in the reverse endianness, ie
> > > > > little-endian instead of big-endian for PowerPC, or provide instruction
> > > > > to make memory accesses in the "reverse-endian". This is implemented as
> > > > > a global flag on some CPU. This case is already handled by the PowerPC
> > > > > emulation but is is far from being optimal. Some other implementations
> > > > > allow the OS to store an "reverse-endian" flag in the TLB or the segment
> > > > > descriptors, thus providing per-page or per-segment endianness control.
> > > > > This is mostly used to ease driver migration from a PC platform to
> > > > > PowerPC without taking any care of the device endianness in the driver
> > > > > code (yes, this is bad...).
> > > >
> > > > Nice, this may be useful for Sparc64. It has a global CPU flag for
> > > > endianness, individual pages can be marked as reverse endian, and
> > > > finally there are instructions that access memory in reverse endian.
> > > > The end result is a XOR of all these reverses. Though I don't know if
> > > > any of these features are used at all.
> > >
> > > I realized that I/O accesses for reverse-endian pages were not correct
> > > in the softmmu_template.h header. This new version fixes this. It also
> > > remove duplicated code in the case of unaligned accesses in a
> > > reverse-endian page.
> > 
> > I think 64 bit access case is not handled correctly, but to solve that
> > it would be nice to extend the current IO access system to 64 bits.
> 
> I think that if it was previously correct, it should still be, but... I
> don't know how much having 64 bits I/O accesses is interresting, as I
> don't know if there are real hw buses that have 64 bits data path...
> 
> Here's another version taking care of your remark about ldl memory
> accessors.
> * I replaced all ldl occurences with ldul
> * when TARGET_LONG_BITS == 64, I also added ldsl accessors. And I
> started using it in the PowerPC memory access micro-ops.
> Then the patch is really more invasive than the previous ones.
> This still does not break PowerPC or i386 target, as it seems.

Here's a new version. The only change is that, for consistency, I did
add the big-endian and little-endian accessors that were documented in
cpu-all.h as unimplemented. The implementation is quite trivial, having
native and reverse-endian accessors available, and changes functionnally
nothing to the previous version.

-- 
J. Mayer <l_indien@magic.fr>
Never organized

[-- Attachment #2: softmmu_reverse_endian.diff --]
[-- Type: text/x-patch, Size: 181329 bytes --]

Index: cpu-all.h
===================================================================
RCS file: /sources/qemu/qemu/cpu-all.h,v
retrieving revision 1.76
diff -u -d -d -p -r1.76 cpu-all.h
--- cpu-all.h	23 Sep 2007 15:28:03 -0000	1.76
+++ cpu-all.h	13 Oct 2007 22:00:07 -0000
@@ -161,9 +161,9 @@ typedef union {
  *
  * endian is:
  * (empty): target cpu endianness or 8 bit access
- *   r    : reversed target cpu endianness (not implemented yet)
- *   be   : big endian (not implemented yet)
- *   le   : little endian (not implemented yet)
+ *   r    : reversed target cpu endianness
+ *   be   : big endian
+ *   le   : little endian
  *
  * access_type is:
  *   raw    : host memory access
@@ -215,7 +215,32 @@ static inline int ldsw_le_p(void *ptr)
 #endif
 }
 
-static inline int ldl_le_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return (uint32_t)val;
+#else
+    uint8_t *p = ptr;
+    return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+#endif
+}
+
+static inline int64_t ldsl_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return (int32_t)val;
+#else
+    uint8_t *p = ptr;
+    return (int32_t)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24));
+#endif
+}
+#else
+static inline int ldul_le_p(void *ptr)
 {
 #ifdef __powerpc__
     int val;
@@ -226,13 +251,14 @@ static inline int ldl_le_p(void *ptr)
     return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
 #endif
 }
+#endif
 
 static inline uint64_t ldq_le_p(void *ptr)
 {
     uint8_t *p = ptr;
     uint32_t v1, v2;
-    v1 = ldl_le_p(p);
-    v2 = ldl_le_p(p + 4);
+    v1 = ldul_le_p(p);
+    v2 = ldul_le_p(p + 4);
     return v1 | ((uint64_t)v2 << 32);
 }
 
@@ -275,7 +301,7 @@ static inline float32 ldfl_le_p(void *pt
         float32 f;
         uint32_t i;
     } u;
-    u.i = ldl_le_p(ptr);
+    u.i = ldul_le_p(ptr);
     return u.f;
 }
 
@@ -292,8 +318,8 @@ static inline void stfl_le_p(void *ptr, 
 static inline float64 ldfq_le_p(void *ptr)
 {
     CPU_DoubleU u;
-    u.l.lower = ldl_le_p(ptr);
-    u.l.upper = ldl_le_p(ptr + 4);
+    u.l.lower = ldul_le_p(ptr);
+    u.l.upper = ldul_le_p(ptr + 4);
     return u.d;
 }
 
@@ -317,10 +343,22 @@ static inline int ldsw_le_p(void *ptr)
     return *(int16_t *)ptr;
 }
 
-static inline int ldl_le_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_le_p(void *ptr)
+{
+    return *(uint32_t *)ptr;
+}
+
+static inline int64_t ldsl_le_p(void *ptr)
+{
+    return *(int32_t *)ptr;
+}
+#else
+static inline int ldul_le_p(void *ptr)
 {
     return *(uint32_t *)ptr;
 }
+#endif
 
 static inline uint64_t ldq_le_p(void *ptr)
 {
@@ -397,7 +435,38 @@ static inline int ldsw_be_p(void *ptr)
 #endif
 }
 
-static inline int ldl_be_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_be_p(void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+    int val;
+    asm volatile ("movl %1, %0\n"
+                  "bswap %0\n"
+                  : "=r" (val)
+                  : "m" (*(uint32_t *)ptr));
+    return (uint32_t)val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
+#endif
+}
+
+static inline int64_t ldsl_be_p(void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+    int val;
+    asm volatile ("movl %1, %0\n"
+                  "bswap %0\n"
+                  : "=r" (val)
+                  : "m" (*(uint32_t *)ptr));
+    return (int32_t)val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return (int32_t)((b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]);
+#endif
+}
+#else
+static inline int ldul_be_p(void *ptr)
 {
 #if defined(__i386__) || defined(__x86_64__)
     int val;
@@ -411,12 +480,13 @@ static inline int ldl_be_p(void *ptr)
     return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
 #endif
 }
+#endif
 
 static inline uint64_t ldq_be_p(void *ptr)
 {
     uint32_t a,b;
-    a = ldl_be_p(ptr);
-    b = ldl_be_p(ptr+4);
+    a = ldul_be_p(ptr);
+    b = ldul_be_p(ptr+4);
     return (((uint64_t)a<<32)|b);
 }
 
@@ -464,7 +534,7 @@ static inline float32 ldfl_be_p(void *pt
         float32 f;
         uint32_t i;
     } u;
-    u.i = ldl_be_p(ptr);
+    u.i = ldul_be_p(ptr);
     return u.f;
 }
 
@@ -481,8 +551,8 @@ static inline void stfl_be_p(void *ptr, 
 static inline float64 ldfq_be_p(void *ptr)
 {
     CPU_DoubleU u;
-    u.l.upper = ldl_be_p(ptr);
-    u.l.lower = ldl_be_p(ptr + 4);
+    u.l.upper = ldul_be_p(ptr);
+    u.l.lower = ldul_be_p(ptr + 4);
     return u.d;
 }
 
@@ -506,10 +576,22 @@ static inline int ldsw_be_p(void *ptr)
     return *(int16_t *)ptr;
 }
 
-static inline int ldl_be_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_be_p(void *ptr)
+{
+    return *(uint32_t *)ptr;
+}
+
+static inline int64_t ldsl_be_p(void *ptr)
+{
+    return *(int32_t *)ptr;
+}
+#else
+static inline int ldul_be_p(void *ptr)
 {
     return *(uint32_t *)ptr;
 }
+#endif
 
 static inline uint64_t ldq_be_p(void *ptr)
 {
@@ -557,9 +639,13 @@ static inline void stfq_be_p(void *ptr, 
 
 /* target CPU memory access functions */
 #if defined(TARGET_WORDS_BIGENDIAN)
+/* native-endian */
 #define lduw_p(p) lduw_be_p(p)
 #define ldsw_p(p) ldsw_be_p(p)
-#define ldl_p(p) ldl_be_p(p)
+#define ldul_p(p) ldul_be_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_p(p) ldsl_be_p(p)
+#endif
 #define ldq_p(p) ldq_be_p(p)
 #define ldfl_p(p) ldfl_be_p(p)
 #define ldfq_p(p) ldfq_be_p(p)
@@ -568,10 +654,29 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_p(p, v) stq_be_p(p, v)
 #define stfl_p(p, v) stfl_be_p(p, v)
 #define stfq_p(p, v) stfq_be_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_le_p(p)
+#define ldswr_p(p) ldsw_le_p(p)
+#define ldulr_p(p) ldul_le_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_p(p) ldsl_le_p(p)
+#endif
+#define ldqr_p(p) ldq_le_p(p)
+#define ldflr_p(p) ldfl_le_p(p)
+#define ldfqr_p(p) ldfq_le_p(p)
+#define stwr_p(p, v) stw_le_p(p, v)
+#define stlr_p(p, v) stl_le_p(p, v)
+#define stqr_p(p, v) stq_le_p(p, v)
+#define stflr_p(p, v) stfl_le_p(p, v)
+#define stfqr_p(p, v) stfq_le_p(p, v)
 #else
+/* native-endian */
 #define lduw_p(p) lduw_le_p(p)
 #define ldsw_p(p) ldsw_le_p(p)
-#define ldl_p(p) ldl_le_p(p)
+#define ldul_p(p) ldul_le_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_p(p) ldsl_le_p(p)
+#endif
 #define ldq_p(p) ldq_le_p(p)
 #define ldfl_p(p) ldfl_le_p(p)
 #define ldfq_p(p) ldfq_le_p(p)
@@ -580,6 +685,21 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_p(p, v) stq_le_p(p, v)
 #define stfl_p(p, v) stfl_le_p(p, v)
 #define stfq_p(p, v) stfq_le_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_be_p(p)
+#define ldswr_p(p) ldsw_be_p(p)
+#define ldulr_p(p) ldul_be_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_p(p) ldsl_be_p(p)
+#endif
+#define ldqr_p(p) ldq_be_p(p)
+#define ldflr_p(p) ldfl_be_p(p)
+#define ldfqr_p(p) ldfq_be_p(p)
+#define stwr_p(p, v) stw_be_p(p, v)
+#define stlr_p(p, v) stl_be_p(p, v)
+#define stqr_p(p, v) stq_be_p(p, v)
+#define stflr_p(p, v) stfl_be_p(p, v)
+#define stfqr_p(p, v) stfq_be_p(p, v)
 #endif
 
 /* MMU memory access macros */
@@ -605,11 +725,15 @@ static inline void stfq_be_p(void *ptr, 
 #define laddr(x) (uint8_t *)(long)(x)
 #endif
 
+/* native-endian */
 #define ldub_raw(p) ldub_p(laddr((p)))
 #define ldsb_raw(p) ldsb_p(laddr((p)))
 #define lduw_raw(p) lduw_p(laddr((p)))
 #define ldsw_raw(p) ldsw_p(laddr((p)))
-#define ldl_raw(p) ldl_p(laddr((p)))
+#define ldul_raw(p) ldul_p(laddr((p)))
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_raw(p) ldsl_p(laddr((p)))
+#endif
 #define ldq_raw(p) ldq_p(laddr((p)))
 #define ldfl_raw(p) ldfl_p(laddr((p)))
 #define ldfq_raw(p) ldfq_p(laddr((p)))
@@ -619,16 +743,112 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_raw(p, v) stq_p(saddr((p)), v)
 #define stfl_raw(p, v) stfl_p(saddr((p)), v)
 #define stfq_raw(p, v) stfq_p(saddr((p)), v)
-
+/* reverse endian */
+#define ldubr_raw(p) ldub_p(laddr((p)))
+#define ldsbr_raw(p) ldsb_p(laddr((p)))
+#define lduwr_raw(p) lduwr_p(laddr((p)))
+#define ldswr_raw(p) ldswr_p(laddr((p)))
+#define ldulr_raw(p) ldulr_p(laddr((p)))
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_raw(p) ldslr_p(laddr((p)))
+#endif
+#define ldqr_raw(p) ldqr_p(laddr((p)))
+#define ldflr_raw(p) ldflr_p(laddr((p)))
+#define ldfqr_raw(p) ldfqr_p(laddr((p)))
+#define stbr_raw(p, v) stb_p(saddr((p)), v)
+#define stwr_raw(p, v) stwr_p(saddr((p)), v)
+#define stlr_raw(p, v) stlr_p(saddr((p)), v)
+#define stqr_raw(p, v) stqr_p(saddr((p)), v)
+#define stflr_raw(p, v) stflr_p(saddr((p)), v)
+#define stfqr_raw(p, v) stfqr_p(saddr((p)), v)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be_raw(p) ldub_raw(p)
+#define ldsb_be_raw(p) ldsb_raw(p)
+#define lduw_be_raw(p) lduw_raw(p)
+#define ldsw_be_raw(p) ldsw_raw(p)
+#define ldul_be_raw(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_raw(p) ldsl_raw(p)
+#endif
+#define ldq_be_raw(p) ldq_raw(p)
+#define ldfl_be_raw(p) ldfl_raw(p)
+#define ldfq_be_raw(p) ldfq_raw(p)
+#define stb_be_raw(p, v) stb_raw(p, v)
+#define stw_be_raw(p, v) stw_raw(p, v)
+#define stl_be_raw(p, v) stl_raw(p, v)
+#define stq_be_raw(p, v) stq_raw(p, v)
+#define stfl_be_raw(p, v) stfl_raw(p, v)
+#define stfq_be_raw(p, v) stfq_raw(p, v)
+/* little-endian */
+#define ldub_le_raw(p) ldubr_raw(p)
+#define ldsb_le_raw(p) ldsbr_raw(p)
+#define lduw_le_raw(p) lduwr_raw(p)
+#define ldsw_le_raw(p) ldswr_raw(p)
+#define ldul_le_raw(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_raw(p) ldslr_raw(p)
+#endif
+#define ldq_le_raw(p) ldqr_raw(p)
+#define ldfl_le_raw(p) ldflr_raw(p)
+#define ldfq_le_raw(p) ldfqr_raw(p)
+#define stb_le_raw(p, v) stbr_raw(p, v)
+#define stw_le_raw(p, v) stwr_raw(p, v)
+#define stl_le_raw(p, v) stlr_raw(p, v)
+#define stq_le_raw(p, v) stqr_raw(p, v)
+#define stfl_le_raw(p, v) stflr_raw(p, v)
+#define stfq_le_raw(p, v) stfqr_raw(p, v)
+#else
+/* big-endian */
+#define ldub_be_raw(p) ldubr_raw(p)
+#define ldsb_be_raw(p) ldsbr_raw(p)
+#define lduw_be_raw(p) lduwr_raw(p)
+#define ldsw_be_raw(p) ldswr_raw(p)
+#define ldul_be_raw(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_raw(p) ldslr_raw(p)
+#endif
+#define ldq_be_raw(p) ldqr_raw(p)
+#define ldfl_be_raw(p) ldflr_raw(p)
+#define ldfq_be_raw(p) ldfqr_raw(p)
+#define stb_be_raw(p, v) stbr_raw(p, v)
+#define stw_be_raw(p, v) stwr_raw(p, v)
+#define stl_be_raw(p, v) stlr_raw(p, v)
+#define stq_be_raw(p, v) stqr_raw(p, v)
+#define stfl_be_raw(p, v) stflr_raw(p, v)
+#define stfq_be_raw(p, v) stfqr_raw(p, v)
+/* little-endian */
+#define ldub_le_raw(p) ldub_raw(p)
+#define ldsb_le_raw(p) ldsb_raw(p)
+#define lduw_le_raw(p) lduw_raw(p)
+#define ldsw_le_raw(p) ldsw_raw(p)
+#define ldul_le_raw(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_raw(p) ldsl_raw(p)
+#endif
+#define ldq_le_raw(p) ldq_raw(p)
+#define ldfl_le_raw(p) ldfl_raw(p)
+#define ldfq_le_raw(p) ldfq_raw(p)
+#define stb_le_raw(p, v) stb_raw(p, v)
+#define stw_le_raw(p, v) stw_raw(p, v)
+#define stl_le_raw(p, v) stl_raw(p, v)
+#define stq_le_raw(p, v) stq_raw(p, v)
+#define stfl_le_raw(p, v) stfl_raw(p, v)
+#define stfq_le_raw(p, v) stfq_raw(p, v)
+#endif
 
 #if defined(CONFIG_USER_ONLY)
 
 /* if user mode, no other memory access functions */
+/* native-endian */
 #define ldub(p) ldub_raw(p)
 #define ldsb(p) ldsb_raw(p)
 #define lduw(p) lduw_raw(p)
 #define ldsw(p) ldsw_raw(p)
-#define ldl(p) ldl_raw(p)
+#define ldul(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl(p) ldsl_raw(p)
+#endif
 #define ldq(p) ldq_raw(p)
 #define ldfl(p) ldfl_raw(p)
 #define ldfq(p) ldfq_raw(p)
@@ -638,19 +858,173 @@ static inline void stfq_be_p(void *ptr, 
 #define stq(p, v) stq_raw(p, v)
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr(p) ldub_raw(p)
+#define ldsbr(p) ldsb_raw(p)
+#define lduwr(p) lduwr_raw(p)
+#define ldswr(p) ldswr_raw(p)
+#define ldulr(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr(p) ldslr_raw(p)
+#endif
+#define ldqr(p) ldqr_raw(p)
+#define ldflr(p) ldflr_raw(p)
+#define ldfqr(p) ldfqr_raw(p)
+#define stbr(p, v) stb_raw(p, v)
+#define stwr(p, v) stwr_raw(p, v)
+#define stlr(p, v) stlr_raw(p, v)
+#define stqr(p, v) stqr_raw(p, v)
+#define stflr(p, v) stflr_raw(p, v)
+#define stfqr(p, v) stfqr_raw(p, v)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be(p) ldub(p)
+#define ldsb_be(p) ldsb(p)
+#define lduw_be(p) lduw(p)
+#define ldsw_be(p) ldsw(p)
+#define ldul_be(p) ldul(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be(p) ldsl(p)
+#endif
+#define ldq_be(p) ldq(p)
+#define ldfl_be(p) ldfl(p)
+#define ldfq_be(p) ldfq(p)
+#define stb_be(p, v) stb(p, v)
+#define stw_be(p, v) stw(p, v)
+#define stl_be(p, v) stl(p, v)
+#define stq_be(p, v) stq(p, v)
+#define stfl_be(p, v) stfl(p, v)
+#define stfq_be(p, v) stfq(p, v)
+/* little-endian */
+#define ldub_le(p) ldubr(p)
+#define ldsb_le(p) ldsbr(p)
+#define lduw_le(p) lduwr(p)
+#define ldsw_le(p) ldswr(p)
+#define ldul_le(p) ldulr(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le(p) ldslr(p)
+#endif
+#define ldq_le(p) ldqr(p)
+#define ldfl_le(p) ldflr(p)
+#define ldfq_le(p) ldfqr(p)
+#define stb_le(p, v) stbr(p, v)
+#define stw_le(p, v) stwr(p, v)
+#define stl_le(p, v) stlr(p, v)
+#define stq_le(p, v) stqr(p, v)
+#define stfl_le(p, v) stflr(p, v)
+#define stfq_le(p, v) stfqr(p, v)
+#else
+/* big-endian */
+#define ldub_be(p) ldubr(p)
+#define ldsb_be(p) ldsbr(p)
+#define lduw_be(p) lduwr(p)
+#define ldsw_be(p) ldswr(p)
+#define ldul_be(p) ldulr(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be(p) ldslr(p)
+#endif
+#define ldq_be(p) ldqr(p)
+#define ldfl_be(p) ldflr(p)
+#define ldfq_be(p) ldfqr(p)
+#define stb_be(p, v) stbr(p, v)
+#define stw_be(p, v) stwr(p, v)
+#define stl_be(p, v) stlr(p, v)
+#define stq_be(p, v) stqr(p, v)
+#define stfl_be(p, v) stflr(p, v)
+#define stfq_be(p, v) stfqr(p, v)
+/* little-endian */
+#define ldub_le(p) ldub(p)
+#define ldsb_le(p) ldsb(p)
+#define lduw_le(p) lduw(p)
+#define ldsw_le(p) ldsw(p)
+#define ldul_le(p) ldul(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le(p) ldsl(p)
+#endif
+#define ldq_le(p) ldq(p)
+#define ldfl_le(p) ldfl(p)
+#define ldfq_le(p) ldfq(p)
+#define stb_le(p, v) stb(p, v)
+#define stw_le(p, v) stw(p, v)
+#define stl_le(p, v) stl(p, v)
+#define stq_le(p, v) stq(p, v)
+#define stfl_le(p, v) stfl(p, v)
+#define stfq_le(p, v) stfq(p, v)
+#endif
 
+/* native-endian */
 #define ldub_code(p) ldub_raw(p)
 #define ldsb_code(p) ldsb_raw(p)
 #define lduw_code(p) lduw_raw(p)
 #define ldsw_code(p) ldsw_raw(p)
-#define ldl_code(p) ldl_raw(p)
+#define ldul_code(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_code(p) ldsl_raw(p)
+#endif
 #define ldq_code(p) ldq_raw(p)
+/* reverse-endian */
+#define ldubr_code(p) ldub_raw(p)
+#define ldsbr_code(p) ldsb_raw(p)
+#define lduwr_code(p) lduwr_raw(p)
+#define ldswr_code(p) ldswr_raw(p)
+#define ldulr_code(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_code(p) ldslr_raw(p)
+#endif
+#define ldqr_code(p) ldqr_raw(p)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be_code(p) ldub_code(p)
+#define ldsb_be_code(p) ldsb_code(p)
+#define lduw_be_code(p) lduw_code(p)
+#define ldsw_be_code(p) ldsw_code(p)
+#define ldul_be_code(p) ldul_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_code(p) ldsl_code(p)
+#endif
+#define ldq_be_code(p) ldq_code(p)
+/* little-endian */
+#define ldub_le_code(p) ldubr_code(p)
+#define ldsb_le_code(p) ldsbr_code(p)
+#define lduw_le_code(p) lduwr_code(p)
+#define ldsw_le_code(p) ldswr_code(p)
+#define ldul_le_code(p) ldulr_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_code(p) ldslr_code(p)
+#endif
+#define ldq_le_code(p) ldqr_code(p)
+#else
+/* big-endian */
+#define ldub_be_code(p) ldubr_code(p)
+#define ldsb_be_code(p) ldsbr_code(p)
+#define lduw_be_code(p) lduwr_code(p)
+#define ldsw_be_code(p) ldswr_code(p)
+#define ldul_be_code(p) ldulr_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_code(p) ldslr_code(p)
+#endif
+#define ldq_be_code(p) ldqr_code(p)
+/* little-endian */
+#define ldub_le_code(p) ldub_code(p)
+#define ldsb_le_code(p) ldsb_code(p)
+#define lduw_le_code(p) lduw_code(p)
+#define ldsw_le_code(p) ldsw_code(p)
+#define ldul_le_code(p) ldul_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_code(p) ldsl_code(p)
+#endif
+#define ldq_le_code(p) ldq_code(p)
+#endif
 
+/* native-endian */
 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
 #define lduw_kernel(p) lduw_raw(p)
 #define ldsw_kernel(p) ldsw_raw(p)
-#define ldl_kernel(p) ldl_raw(p)
+#define ldul_kernel(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_kernel(p) ldsl_raw(p)
+#endif
 #define ldq_kernel(p) ldq_raw(p)
 #define ldfl_kernel(p) ldfl_raw(p)
 #define ldfq_kernel(p) ldfq_raw(p)
@@ -660,6 +1034,99 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_kernel(p, v) stq_raw(p, v)
 #define stfl_kernel(p, v) stfl_raw(p, v)
 #define stfq_kernel(p, vt) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr_kernel(p) ldub_raw(p)
+#define ldsbr_kernel(p) ldsb_raw(p)
+#define lduwr_kernel(p) lduwr_raw(p)
+#define ldswr_kernel(p) ldswr_raw(p)
+#define ldulr_kernel(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_kernel(p) ldslr_raw(p)
+#endif
+#define ldqr_kernel(p) ldqr_raw(p)
+#define ldflr_kernel(p) ldflr_raw(p)
+#define ldfqr_kernel(p) ldfqr_raw(p)
+#define stbr_kernel(p, v) stbr_raw(p, v)
+#define stwr_kernel(p, v) stwr_raw(p, v)
+#define stlr_kernel(p, v) stlr_raw(p, v)
+#define stqr_kernel(p, v) stqr_raw(p, v)
+#define stflr_kernel(p, v) stflr_raw(p, v)
+#define stfqr_kernel(p, vt) stfqr_raw(p, v)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be_kernel(p) ldub_kernel(p)
+#define ldsb_be_kernel(p) ldsb_kernel(p)
+#define lduw_be_kernel(p) lduw_kernel(p)
+#define ldsw_be_kernel(p) ldsw_kernel(p)
+#define ldul_be_kernel(p) ldul_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_kernel(p) ldsl_kernel(p)
+#endif
+#define ldq_be_kernel(p) ldq_kernel(p)
+#define ldfl_be_kernel(p) ldfl_kernel(p)
+#define ldfq_be_kernel(p) ldfq_kernel(p)
+#define stb_be_kernel(p, v) stb_kernel(p, v)
+#define stw_be_kernel(p, v) stw_kernel(p, v)
+#define stl_be_kernel(p, v) stl_kernel(p, v)
+#define stq_be_kernel(p, v) stq_kernel(p, v)
+#define stfl_be_kernel(p, v) stfl_kernel(p, v)
+#define stfq_be_kernel(p, vt) stfq_kernel(p, vt)
+/* little-endian */
+#define ldub_le_kernel(p) ldubr_kernel(p)
+#define ldsb_le_kernel(p) ldsbr_kernel(p)
+#define lduw_le_kernel(p) lduwr_kernel(p)
+#define ldsw_le_kernel(p) ldswr_kernel(p)
+#define ldul_le_kernel(p) ldulr_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_kernel(p) ldslr_kernel(p)
+#endif
+#define ldq_le_kernel(p) ldqr_kernel(p)
+#define ldfl_le_kernel(p) ldflr_kernel(p)
+#define ldfq_le_kernel(p) ldfqr_kernel(p)
+#define stb_le_kernel(p, v) stbr_kernel(p, v)
+#define stw_le_kernel(p, v) stwr_kernel(p, v)
+#define stl_le_kernel(p, v) stlr_kernel(p, v)
+#define stq_le_kernel(p, v) stqr_kernel(p, v)
+#define stfl_le_kernel(p, v) stflr_kernel(p, v)
+#define stfq_le_kernel(p, vt) stfqr_kernel(p, vt)
+#else
+/* big-endian */
+#define ldub_be_kernel(p) ldubr_kernel(p)
+#define ldsb_be_kernel(p) ldsbr_kernel(p)
+#define lduw_be_kernel(p) lduwr_kernel(p)
+#define ldsw_be_kernel(p) ldswr_kernel(p)
+#define ldul_be_kernel(p) ldulr_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_kernel(p) ldslr_kernel(p)
+#endif
+#define ldq_be_kernel(p) ldqr_kernel(p)
+#define ldfl_be_kernel(p) ldflr_kernel(p)
+#define ldfq_be_kernel(p) ldfqr_kernel(p)
+#define stb_be_kernel(p, v) stbr_kernel(p, v)
+#define stw_be_kernel(p, v) stwr_kernel(p, v)
+#define stl_be_kernel(p, v) stlr_kernel(p, v)
+#define stq_be_kernel(p, v) stqr_kernel(p, v)
+#define stfl_be_kernel(p, v) stflr_kernel(p, v)
+#define stfq_be_kernel(p, vt) stfqr_kernel(p, vt)
+/* little-endian */
+#define ldub_le_kernel(p) ldub_kernel(p)
+#define ldsb_le_kernel(p) ldsb_kernel(p)
+#define lduw_le_kernel(p) lduw_kernel(p)
+#define ldsw_le_kernel(p) ldsw_kernel(p)
+#define ldul_le_kernel(p) ldul_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_kernel(p) ldsl_kernel(p)
+#endif
+#define ldq_le_kernel(p) ldq_kernel(p)
+#define ldfl_le_kernel(p) ldfl_kernel(p)
+#define ldfq_le_kernel(p) ldfq_kernel(p)
+#define stb_le_kernel(p, v) stb_kernel(p, v)
+#define stw_le_kernel(p, v) stw_kernel(p, v)
+#define stl_le_kernel(p, v) stl_kernel(p, v)
+#define stq_le_kernel(p, v) stq_kernel(p, v)
+#define stfl_le_kernel(p, v) stfl_kernel(p, v)
+#define stfq_le_kernel(p, vt) stfq_kernel(p, vt)
+#endif
 
 #endif /* defined(CONFIG_USER_ONLY) */
 
@@ -790,6 +1257,8 @@ extern uint8_t *phys_ram_dirty;
    the physical address */
 #define IO_MEM_ROMD        (1)
 #define IO_MEM_SUBPAGE     (2)
+/* On some target CPUs, endiannes is stored in page tables */
+#define IO_MEM_REVERSE     (3)
 
 typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
 typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
@@ -821,7 +1290,7 @@ static inline void cpu_physical_memory_w
 }
 uint32_t ldub_phys(target_phys_addr_t addr);
 uint32_t lduw_phys(target_phys_addr_t addr);
-uint32_t ldl_phys(target_phys_addr_t addr);
+uint32_t ldul_phys(target_phys_addr_t addr);
 uint64_t ldq_phys(target_phys_addr_t addr);
 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val);
 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val);
Index: cpu-exec.c
===================================================================
RCS file: /sources/qemu/qemu/cpu-exec.c,v
retrieving revision 1.119
diff -u -d -d -p -r1.119 cpu-exec.c
--- cpu-exec.c	8 Oct 2007 13:16:13 -0000	1.119
+++ cpu-exec.c	13 Oct 2007 22:00:07 -0000
@@ -436,12 +436,12 @@ int cpu_exec(CPUState *env1)
                          /* FIXME: this should respect TPR */
                          env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
                          svm_check_intercept(SVM_EXIT_VINTR);
-                         intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
+                         intno = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
                          if (loglevel & CPU_LOG_TB_IN_ASM)
                              fprintf(logfile, "Servicing virtual hardware INT=0x%02x\n", intno);
 	                 do_interrupt(intno, 0, 0, -1, 1);
                          stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl),
-                                  ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
+                                  ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
 #if defined(__sparc__) && !defined(HOST_SOLARIS)
                          tmp_T0 = 0;
 #else
Index: exec-all.h
===================================================================
RCS file: /sources/qemu/qemu/exec-all.h,v
retrieving revision 1.67
diff -u -d -d -p -r1.67 exec-all.h
--- exec-all.h	8 Oct 2007 13:16:14 -0000	1.67
+++ exec-all.h	13 Oct 2007 22:00:07 -0000
@@ -569,6 +569,21 @@ void tlb_fill(target_ulong addr, int is_
 #define MEMSUFFIX _code
 #define env cpu_single_env
 
+/* native-endian */
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+
+/* reverse-endian */
+#define REVERSE_ENDIAN
 #define DATA_SIZE 1
 #include "softmmu_header.h"
 
@@ -580,6 +595,7 @@ void tlb_fill(target_ulong addr, int is_
 
 #define DATA_SIZE 8
 #include "softmmu_header.h"
+#undef REVERSE_ENDIAN
 
 #undef ACCESS_TYPE
 #undef MEMSUFFIX
Index: exec.c
===================================================================
RCS file: /sources/qemu/qemu/exec.c,v
retrieving revision 1.108
diff -u -d -d -p -r1.108 exec.c
--- exec.c	8 Oct 2007 13:16:14 -0000	1.108
+++ exec.c	13 Oct 2007 22:00:08 -0000
@@ -2202,7 +2202,7 @@ static uint32_t watch_mem_readw(void *op
 
 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
 {
-    return ldl_phys(addr);
+    return ldul_phys(addr);
 }
 
 /* Generate a debug exception if a watchpoint has been hit.
@@ -2507,7 +2507,7 @@ void cpu_physical_memory_rw(target_phys_
     uint8_t *ptr;
     uint32_t val;
     target_phys_addr_t page;
-    unsigned long pd;
+    unsigned long pd, addr1;
     PhysPageDesc *p;
 
     while (len > 0) {
@@ -2524,31 +2524,54 @@ void cpu_physical_memory_rw(target_phys_
 
         if (is_write) {
             if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                /* XXX: could force cpu_single_env to NULL to avoid
-                   potential bugs */
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit write access */
-                    val = ldl_p(buf);
-                    io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit write access */
-                    val = lduw_p(buf);
-                    io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        stlr_p(ptr, *(uint32_t *)buf);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        stwr_p(ptr, *(uint16_t *)buf);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *ptr = *buf;
+                    goto invalidate_code;
                 } else {
-                    /* 8 bit write access */
-                    val = ldub_p(buf);
-                    io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
-                    l = 1;
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    /* XXX: could force cpu_single_env to NULL to avoid
+                       potential bugs */
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit write access */
+                        val = ldul_p(buf);
+                        io_mem_write[io_index][2](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit write access */
+                        val = lduw_p(buf);
+                        io_mem_write[io_index][1](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit write access */
+                        val = ldub_p(buf);
+                        io_mem_write[io_index][0](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 1;
+                    }
                 }
             } else {
-                unsigned long addr1;
                 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
                 /* RAM case */
                 ptr = phys_ram_base + addr1;
                 memcpy(ptr, buf, l);
+            invalidate_code:
                 if (!cpu_physical_memory_is_dirty(addr1)) {
                     /* invalidate code */
                     tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
@@ -2560,23 +2583,45 @@ void cpu_physical_memory_rw(target_phys_
         } else {
             if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
                 !(pd & IO_MEM_ROMD)) {
-                /* I/O case */
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit read access */
-                    val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
-                    stl_p(buf, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit read access */
-                    val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
-                    stw_p(buf, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        *(uint32_t *)buf = ldulr_p(ptr);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        *(uint16_t *)buf = lduwr_p(ptr);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *buf = *ptr;
                 } else {
-                    /* 8 bit read access */
-                    val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr);
-                    stb_p(buf, val);
-                    l = 1;
+                    /* I/O case */
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit read access */
+                        val = io_mem_read[io_index][2](io_mem_opaque[io_index],
+                                                       addr);
+                        stl_p(buf, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit read access */
+                        val = io_mem_read[io_index][1](io_mem_opaque[io_index],
+                                                       addr);
+                        stw_p(buf, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit read access */
+                        val = io_mem_read[io_index][0](io_mem_opaque[io_index],
+                                                       addr);
+                        stb_p(buf, val);
+                        l = 1;
+                    }
                 }
             } else {
                 /* RAM case */
@@ -2632,7 +2677,7 @@ void cpu_physical_memory_write_rom(targe
 
 
 /* warning: addr must be aligned */
-uint32_t ldl_phys(target_phys_addr_t addr)
+uint32_t ldul_phys(target_phys_addr_t addr)
 {
     int io_index;
     uint8_t *ptr;
@@ -2656,7 +2701,7 @@ uint32_t ldl_phys(target_phys_addr_t add
         /* RAM case */
         ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldl_p(ptr);
+        val = ldul_p(ptr);
     }
     return val;
 }
@@ -2907,6 +2952,7 @@ void dump_exec_info(FILE *f,
 #define env cpu_single_env
 #define SOFTMMU_CODE_ACCESS
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2919,6 +2965,21 @@ void dump_exec_info(FILE *f,
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 #undef env
 
 #endif
Index: monitor.c
===================================================================
RCS file: /sources/qemu/qemu/monitor.c,v
retrieving revision 1.83
diff -u -d -d -p -r1.83 monitor.c
--- monitor.c	25 Sep 2007 17:28:42 -0000	1.83
+++ monitor.c	13 Oct 2007 22:00:08 -0000
@@ -595,7 +595,7 @@ static void memory_dump(int count, int f
                 v = lduw_raw(buf + i);
                 break;
             case 4:
-                v = (uint32_t)ldl_raw(buf + i);
+                v = (uint32_t)ldul_raw(buf + i);
                 break;
             case 8:
                 v = ldq_raw(buf + i);
Index: softmmu_exec.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_exec.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 softmmu_exec.h
--- softmmu_exec.h	30 Oct 2005 18:16:26 -0000	1.1
+++ softmmu_exec.h	13 Oct 2007 22:00:08 -0000
@@ -1,7 +1,7 @@
 /* Common softmmu definitions and inline routines.  */
 
-#define ldul_user ldl_user
-#define ldul_kernel ldl_kernel
+#define lduq_user ldq_user
+#define lduq_kernel ldq_kernel
 
 #define ACCESS_TYPE 0
 #define MEMSUFFIX _kernel
@@ -56,7 +56,10 @@
 #define ldsb(p) ldsb_data(p)
 #define lduw(p) lduw_data(p)
 #define ldsw(p) ldsw_data(p)
-#define ldl(p) ldl_data(p)
+#define ldul(p) ldul_data(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl(p) ldsl_data(p)
+#endif
 #define ldq(p) ldq_data(p)
 
 #define stb(p, v) stb_data(p, v)
Index: softmmu_header.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_header.h,v
retrieving revision 1.17
diff -u -d -d -p -r1.17 softmmu_header.h
--- softmmu_header.h	8 Oct 2007 13:16:14 -0000	1.17
+++ softmmu_header.h	13 Oct 2007 22:00:08 -0000
@@ -17,27 +17,86 @@
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
+
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
+#if defined(TARGET_WORDS_BIGENDIAN)
+#define ESUFFIX _be
+#else
+#define ESUFFIX _le
+#endif
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
+#define LSUFFIX q
+#define LUSUFFIX q
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
-#define USUFFIX l
+#define USUFFIX ul
+#define LSUFFIX l
+#define LUSUFFIX ul
+#if (TARGET_LONG_BITS == 64)
+#define DATA_STYPE int32_t
+#endif
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define USUFFIX uw
+#define LSUFFIX w
+#define LUSUFFIX uw
 #define DATA_TYPE uint16_t
 #define DATA_STYPE int16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define USUFFIX ub
+#define LSUFFIX b
+#define LUSUFFIX ub
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#else
+#error unsupported data size
+#endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if defined(TARGET_WORDS_BIGENDIAN)
+#define ESUFFIX _le
+#else
+#define ESUFFIX _be
+#endif
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define LSUFFIX q
+#define LUSUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX ulr
+#define LSUFFIX l
+#define LUSUFFIX ul
+#if (TARGET_LONG_BITS == 64)
+#define DATA_STYPE int32_t
+#endif
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define LSUFFIX w
+#define LUSUFFIX uw
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define LSUFFIX b
+#define LUSUFFIX ub
 #define DATA_TYPE uint8_t
 #define DATA_STYPE int8_t
 #else
 #error unsupported data size
 #endif
+#endif /* defined(REVERSE_ENDIAN) */
 
 #if ACCESS_TYPE == 0
 
@@ -168,7 +227,7 @@ static inline RES_TYPE glue(glue(ld, USU
     return res;
 }
 
-#if DATA_SIZE <= 2
+#if defined(DATA_STYPE)
 static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 {
     int res;
@@ -291,7 +350,7 @@ static inline RES_TYPE glue(glue(ld, USU
     return res;
 }
 
-#if DATA_SIZE <= 2
+#if defined(DATA_STYPE)
 static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 {
     int res, index;
@@ -340,8 +399,29 @@ static inline void glue(glue(st, SUFFIX)
 
 #endif /* !asm */
 
+/* BE/LE access routines */
+static inline RES_TYPE glue(glue(glue(ld, LUSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(glue(ld, USUFFIX), MEMSUFFIX)(ptr);
+}
+
+#if defined(DATA_STYPE)
+static inline RES_TYPE glue(glue(glue(lds, LSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(glue(lds, SUFFIX), MEMSUFFIX)(ptr);
+}
+#endif
+
+#if ACCESS_TYPE != 3
+static inline void glue(glue(glue(st, LSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+{
+    glue(glue(st, SUFFIX), MEMSUFFIX)(ptr, v);
+}
+#endif
+
 #if ACCESS_TYPE != 3
 
+#if !defined(REVERSE_ENDIAN)
 #if DATA_SIZE == 8
 static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
 {
@@ -353,6 +433,11 @@ static inline float64 glue(ldfq, MEMSUFF
     return u.d;
 }
 
+static inline float64 glue(glue(ldfq, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfq, MEMSUFFIX)(ptr);
+}
+
 static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v)
 {
     union {
@@ -362,6 +447,12 @@ static inline void glue(stfq, MEMSUFFIX)
     u.d = v;
     glue(stq, MEMSUFFIX)(ptr, u.i);
 }
+
+static inline void glue(glue(stfq, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                        float64 v)
+{
+    glue(stfq, MEMSUFFIX)(ptr, v);
+}
 #endif /* DATA_SIZE == 8 */
 
 #if DATA_SIZE == 4
@@ -371,10 +462,15 @@ static inline float32 glue(ldfl, MEMSUFF
         float32 f;
         uint32_t i;
     } u;
-    u.i = glue(ldl, MEMSUFFIX)(ptr);
+    u.i = glue(ldul, MEMSUFFIX)(ptr);
     return u.f;
 }
 
+static inline float32 glue(glue(ldfl, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfl, MEMSUFFIX)(ptr);
+}
+
 static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
 {
     union {
@@ -384,8 +480,84 @@ static inline void glue(stfl, MEMSUFFIX)
     u.f = v;
     glue(stl, MEMSUFFIX)(ptr, u.i);
 }
+
+static inline void glue(glue(stfl, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                        float32 v)
+{
+    glue(stfl, MEMSUFFIX)(ptr, v);
+}
 #endif /* DATA_SIZE == 4 */
 
+#else /* defined(REVERSE_ENDIAN) */
+
+#if DATA_SIZE == 8
+static inline float64 glue(ldfqr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.i = glue(ldqr, MEMSUFFIX)(ptr);
+    return u.d;
+}
+
+static inline float64 glue(glue(ldfqr, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfqr, MEMSUFFIX)(ptr);
+}
+
+static inline void glue(stfqr, MEMSUFFIX)(target_ulong ptr, float64 v)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.d = v;
+    glue(stqr, MEMSUFFIX)(ptr, u.i);
+}
+
+static inline void glue(glue(stfqr, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                         float64 v)
+{
+    glue(stfqr, MEMSUFFIX)(ptr, v);
+}
+#endif /* DATA_SIZE == 8 */
+
+#if DATA_SIZE == 4
+static inline float32 glue(ldflr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = glue(ldulr, MEMSUFFIX)(ptr);
+    return u.f;
+}
+
+static inline float32 glue(glue(ldflr, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldflr, MEMSUFFIX)(ptr);
+}
+
+static inline void glue(stflr, MEMSUFFIX)(target_ulong ptr, float32 v)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    glue(stlr, MEMSUFFIX)(ptr, u.i);
+}
+
+static inline void glue(glue(stflr, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                         float32 v)
+{
+    glue(stflr, MEMSUFFIX)(ptr, v);
+}
+#endif /* DATA_SIZE == 4 */
+
+#endif /* defined(REVERSE_ENDIAN) */
+
 #endif /* ACCESS_TYPE != 3 */
 
 #undef RES_TYPE
@@ -393,7 +565,10 @@ static inline void glue(stfl, MEMSUFFIX)
 #undef DATA_STYPE
 #undef SUFFIX
 #undef USUFFIX
+#undef LSUFFIX
+#undef LUSUFFIX
 #undef DATA_SIZE
 #undef CPU_MEM_INDEX
 #undef MMUSUFFIX
+#undef ESUFFIX
 #undef ADDR_READ
Index: softmmu_template.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_template.h,v
retrieving revision 1.18
diff -u -d -d -p -r1.18 softmmu_template.h
--- softmmu_template.h	17 Sep 2007 08:09:45 -0000	1.18
+++ softmmu_template.h	13 Oct 2007 22:00:08 -0000
@@ -19,25 +19,66 @@
  */
 #define DATA_SIZE (1 << SHIFT)
 
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
+#define RSUFFIX qr
+#define URSUFFIX qr
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
-#define USUFFIX l
+#define USUFFIX ul
+#define RSUFFIX lr
+#define URSUFFIX ulr
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define USUFFIX uw
+#define RSUFFIX wr
+#define URSUFFIX uwr
 #define DATA_TYPE uint16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define USUFFIX ub
+#define RSUFFIX br
+#define URSUFFIX ubr
 #define DATA_TYPE uint8_t
 #else
 #error unsupported data size
 #endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define RSUFFIX q
+#define URSUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX ulr
+#define RSUFFIX l
+#define URSUFFIX ul
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define RSUFFIX w
+#define URSUFFIX uw
+#define DATA_TYPE uint16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define RSUFFIX b
+#define URSUFFIX ub
+#define DATA_TYPE uint8_t
+#else
+#error unsupported data size
+#endif
+#endif /* defined(REVERSE_ENDIAN) */
+
 
 #ifdef SOFTMMU_CODE_ACCESS
 #define READ_ACCESS_TYPE 2
@@ -47,25 +88,62 @@
 #define ADDR_READ addr_read
 #endif
 
+#if (defined(TARGET_WORDS_BIGENDIAN) && !defined(REVERSE_ENDIAN)) || \
+    (!defined(TARGET_WORDS_BIGENDIAN) && defined(REVERSE_ENDIAN))
+#define ACCESS_WORDS_BIGENDIAN
+#endif
+
+/* Beware: we do not have reverse-endian accessors for IOs */
+#if defined(REVERSE_ENDIAN)
+#define DO_IOSWAP 1
+#else
+#define DO_IOSWAP 0
+#endif
+#if SHIFT == 1
+#define IOSWAP(val) bswap16(val)
+#elif SHIFT >= 2
+#define IOSWAP(val) bswap32(val)
+#else
+#define IOSWAP(val) (val)
+#endif
+
 static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
                                                         int is_user,
                                                         void *retaddr);
 static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
-                                              target_ulong tlb_addr)
+                                              target_ulong tlb_addr,
+                                              int do_ioswap)
 {
     DATA_TYPE res;
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
 #if SHIFT <= 2
     res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        res = IOSWAP(res);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    res = (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr) << 32;
-    res |= io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = (uint64_t)tmp << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= tmp;
 #else
-    res = io_mem_read[index][2](io_mem_opaque[index], physaddr);
-    res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = tmp;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= (uint64_t)tmp << 32;
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -92,10 +170,34 @@ DATA_TYPE REGPARM(1) glue(glue(__ld, SUF
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, READ_ACCESS_TYPE,
+                                            is_user, retaddr);
+                    }
+#endif
+                    res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
@@ -144,10 +246,45 @@ static DATA_TYPE glue(glue(slow_ld, SUFF
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages) */
+                        addr1 = addr & ~(DATA_SIZE - 1);
+                        addr2 = addr1 + DATA_SIZE;
+                        res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
+                                                                      is_user,
+                                                                      retaddr);
+                        res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
+                                                                      is_user,
+                                                                      retaddr);
+                        shift = (addr & (DATA_SIZE - 1)) * 8;
+#ifdef ACCESS_WORDS_BIGENDIAN
+                        res = (res1 >> shift) |
+                            (res2 << ((DATA_SIZE * 8) - shift));
+#else
+                        res = (res1 << shift) |
+                            (res2 >> ((DATA_SIZE * 8) - shift));
+#endif
+                        res = (DATA_TYPE)res;
+                    } else {
+                        /* unaligned/aligned access in the same page */
+                        res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
@@ -158,7 +295,7 @@ static DATA_TYPE glue(glue(slow_ld, SUFF
             res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
                                                           is_user, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
             res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
 #else
             res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
@@ -186,22 +323,39 @@ static void glue(glue(slow_st, SUFFIX), 
 static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
                                           DATA_TYPE val,
                                           target_ulong tlb_addr,
-                                          void *retaddr)
+                                          void *retaddr, int do_ioswap)
 {
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     env->mem_write_vaddr = tlb_addr;
     env->mem_write_pc = (unsigned long)retaddr;
 #if SHIFT <= 2
+    if (do_ioswap != DO_IOSWAP)
+        val = IOSWAP(val);
     io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val >> 32);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #else
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -224,12 +378,37 @@ void REGPARM(2) glue(glue(__st, SUFFIX),
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            retaddr = GETPC();
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    retaddr = GETPC();
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, 1, is_user, retaddr);
+                    }
+#endif
+                    glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr, val);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                retaddr = GETPC();
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+            /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
@@ -275,15 +454,48 @@ static void glue(glue(slow_st, SUFFIX), 
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[is_user][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        /* XXX: not efficient, but simple */
+                        for(i = 0;i < DATA_SIZE; i++) {
+#ifdef ACCESS_WORDS_BIGENDIAN
+                            glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
+                                                      is_user, retaddr);
+#else
+                            glue(slow_stb, MMUSUFFIX)(addr + i,
+                                                      val >> (((DATA_SIZE - 1) * 8)
+                                                              - (i * 8)),
+                                                      is_user, retaddr);
+#endif
+                        }
+
+                    } else {
+                        /* aligned/unaligned access in the same page */
+                        glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr,
+                                                      val);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
             for(i = 0;i < DATA_SIZE; i++) {
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
                 glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
                                           is_user, retaddr);
 #else
@@ -304,10 +516,15 @@ static void glue(glue(slow_st, SUFFIX), 
 
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
 
+#undef DO_IOSWAP
+#undef IOSWAP
+#undef ACCESS_WORDS_BIGENDIAN
 #undef READ_ACCESS_TYPE
 #undef SHIFT
 #undef DATA_TYPE
 #undef SUFFIX
 #undef USUFFIX
+#undef RSUFFIX
+#undef URSUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
Index: hw/eepro100.c
===================================================================
RCS file: /sources/qemu/qemu/hw/eepro100.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 eepro100.c
--- hw/eepro100.c	16 Sep 2007 21:07:52 -0000	1.6
+++ hw/eepro100.c	13 Oct 2007 22:00:08 -0000
@@ -723,7 +723,7 @@ static void eepro100_cu_command(EEPRO100
             uint32_t tbd_address = cb_address + 0x10;
             assert(tcb_bytes <= sizeof(buf));
             while (size < tcb_bytes) {
-                uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                uint32_t tx_buffer_address = ldul_phys(tbd_address);
                 uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                 //~ uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                 tbd_address += 8;
@@ -743,7 +743,7 @@ static void eepro100_cu_command(EEPRO100
                     /* Extended TCB. */
                     assert(tcb_bytes == 0);
                     for (; tbd_count < 2; tbd_count++) {
-                        uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                        uint32_t tx_buffer_address = ldul_phys(tbd_address);
                         uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                         uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                         tbd_address += 8;
@@ -760,7 +760,7 @@ static void eepro100_cu_command(EEPRO100
                 }
                 tbd_address = tbd_array;
                 for (; tbd_count < tx.tbd_count; tbd_count++) {
-                    uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                    uint32_t tx_buffer_address = ldul_phys(tbd_address);
                     uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                     uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                     tbd_address += 8;
Index: hw/pc.c
===================================================================
RCS file: /sources/qemu/qemu/hw/pc.c,v
retrieving revision 1.87
diff -u -d -d -p -r1.87 pc.c
--- hw/pc.c	9 Oct 2007 03:08:56 -0000	1.87
+++ hw/pc.c	13 Oct 2007 22:00:09 -0000
@@ -477,8 +477,8 @@ static void load_linux(const char *kerne
     }
 
     /* kernel protocol version */
-    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
-    if (ldl_p(header+0x202) == 0x53726448)
+    fprintf(stderr, "header magic: %#x\n", ldul_p(header+0x202));
+    if (ldul_p(header+0x202) == 0x53726448)
 	protocol = lduw_p(header+0x206);
     else
 	protocol = 0;
@@ -510,7 +510,7 @@ static void load_linux(const char *kerne
 
     /* highest address for loading the initrd */
     if (protocol >= 0x203)
-	initrd_max = ldl_p(header+0x22c);
+	initrd_max = ldul_p(header+0x22c);
     else
 	initrd_max = 0x37ffffff;
 
Index: hw/pl080.c
===================================================================
RCS file: /sources/qemu/qemu/hw/pl080.c,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 pl080.c
--- hw/pl080.c	16 Sep 2007 21:07:55 -0000	1.5
+++ hw/pl080.c	13 Oct 2007 22:00:09 -0000
@@ -162,10 +162,10 @@ again:
             if (size == 0) {
                 /* Transfer complete.  */
                 if (ch->lli) {
-                    ch->src = ldl_phys(ch->lli);
-                    ch->dest = ldl_phys(ch->lli + 4);
-                    ch->ctrl = ldl_phys(ch->lli + 12);
-                    ch->lli = ldl_phys(ch->lli + 8);
+                    ch->src = ldul_phys(ch->lli);
+                    ch->dest = ldul_phys(ch->lli + 4);
+                    ch->ctrl = ldul_phys(ch->lli + 12);
+                    ch->lli = ldul_phys(ch->lli + 8);
                 } else {
                     ch->conf &= ~PL080_CCONF_E;
                 }
Index: hw/sun4m.c
===================================================================
RCS file: /sources/qemu/qemu/hw/sun4m.c,v
retrieving revision 1.55
diff -u -d -d -p -r1.55 sun4m.c
--- hw/sun4m.c	6 Oct 2007 11:28:21 -0000	1.55
+++ hw/sun4m.c	13 Oct 2007 22:00:09 -0000
@@ -465,7 +465,7 @@ static void sun4m_load_kernel(long vram_
         }
         if (initrd_size > 0) {
             for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
-                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
+                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
                     == 0x48647253) { // HdrS
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
Index: hw/sun4u.c
===================================================================
RCS file: /sources/qemu/qemu/hw/sun4u.c,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 sun4u.c
--- hw/sun4u.c	6 Oct 2007 11:28:21 -0000	1.22
+++ hw/sun4u.c	13 Oct 2007 22:00:09 -0000
@@ -418,7 +418,7 @@ static void sun4u_init(int ram_size, int
         }
         if (initrd_size > 0) {
             for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
-                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
+                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
                     == 0x48647253) { // HdrS
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
Index: linux-user/elfload.c
===================================================================
RCS file: /sources/qemu/qemu/linux-user/elfload.c,v
retrieving revision 1.51
diff -u -d -d -p -r1.51 elfload.c
--- linux-user/elfload.c	9 Oct 2007 16:34:29 -0000	1.51
+++ linux-user/elfload.c	13 Oct 2007 22:00:09 -0000
@@ -336,7 +336,7 @@ static inline void init_thread(struct ta
     pos += sizeof(target_ulong);
     _regs->gpr[4] = pos;
     for (tmp = 1; tmp != 0; pos += sizeof(target_ulong))
-        tmp = ldl(pos);
+        tmp = ldul(pos);
     _regs->gpr[5] = pos;
 }
 
Index: linux-user/qemu.h
===================================================================
RCS file: /sources/qemu/qemu/linux-user/qemu.h,v
retrieving revision 1.40
diff -u -d -d -p -r1.40 qemu.h
--- linux-user/qemu.h	9 Oct 2007 16:34:29 -0000	1.40
+++ linux-user/qemu.h	13 Oct 2007 22:00:09 -0000
@@ -313,7 +313,7 @@ static inline void *lock_user_string(tar
 #define tput8(addr, val) stb(addr, val)
 #define tget16(addr) lduw(addr)
 #define tput16(addr, val) stw(addr, val)
-#define tget32(addr) ldl(addr)
+#define tget32(addr) ldul(addr)
 #define tput32(addr, val) stl(addr, val)
 #define tget64(addr) ldq(addr)
 #define tput64(addr, val) stq(addr, val)
@@ -321,7 +321,7 @@ static inline void *lock_user_string(tar
 #define tgetl(addr) ldq(addr)
 #define tputl(addr, val) stq(addr, val)
 #else
-#define tgetl(addr) ldl(addr)
+#define tgetl(addr) ldul(addr)
 #define tputl(addr, val) stl(addr, val)
 #endif
 
Index: linux-user/signal.c
===================================================================
RCS file: /sources/qemu/qemu/linux-user/signal.c,v
retrieving revision 1.45
diff -u -d -d -p -r1.45 signal.c
--- linux-user/signal.c	5 Oct 2007 17:01:51 -0000	1.45
+++ linux-user/signal.c	13 Oct 2007 22:00:09 -0000
@@ -878,28 +878,28 @@ restore_sigcontext(CPUX86State *env, str
         cpu_x86_load_seg(env, R_ES, lduw(&sc->es));
         cpu_x86_load_seg(env, R_DS, lduw(&sc->ds));
 
-        env->regs[R_EDI] = ldl(&sc->edi);
-        env->regs[R_ESI] = ldl(&sc->esi);
-        env->regs[R_EBP] = ldl(&sc->ebp);
-        env->regs[R_ESP] = ldl(&sc->esp);
-        env->regs[R_EBX] = ldl(&sc->ebx);
-        env->regs[R_EDX] = ldl(&sc->edx);
-        env->regs[R_ECX] = ldl(&sc->ecx);
-        env->eip = ldl(&sc->eip);
+        env->regs[R_EDI] = ldul(&sc->edi);
+        env->regs[R_ESI] = ldul(&sc->esi);
+        env->regs[R_EBP] = ldul(&sc->ebp);
+        env->regs[R_ESP] = ldul(&sc->esp);
+        env->regs[R_EBX] = ldul(&sc->ebx);
+        env->regs[R_EDX] = ldul(&sc->edx);
+        env->regs[R_ECX] = ldul(&sc->ecx);
+        env->eip = ldul(&sc->eip);
 
         cpu_x86_load_seg(env, R_CS, lduw(&sc->cs) | 3);
         cpu_x86_load_seg(env, R_SS, lduw(&sc->ss) | 3);
 
 	{
 		unsigned int tmpflags;
-                tmpflags = ldl(&sc->eflags);
+                tmpflags = ldul(&sc->eflags);
 		env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
                 //		regs->orig_eax = -1;		/* disable syscall checks */
 	}
 
 	{
 		struct _fpstate * buf;
-                buf = (void *)ldl(&sc->fpstate);
+                buf = (void *)ldul(&sc->fpstate);
 		if (buf) {
 #if 0
 			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
@@ -909,7 +909,7 @@ restore_sigcontext(CPUX86State *env, str
 		}
 	}
 
-        *peax = ldl(&sc->eax);
+        *peax = ldul(&sc->eax);
 	return err;
 #if 0
 badframe:
Index: linux-user/vm86.c
===================================================================
RCS file: /sources/qemu/qemu/linux-user/vm86.c,v
retrieving revision 1.11
diff -u -d -d -p -r1.11 vm86.c
--- linux-user/vm86.c	17 Sep 2007 08:09:50 -0000	1.11
+++ linux-user/vm86.c	13 Oct 2007 22:00:09 -0000
@@ -56,7 +56,7 @@ static inline unsigned int vm_getw(uint8
 
 static inline unsigned int vm_getl(uint8_t *segptr, unsigned int reg16)
 {
-    return ldl(segptr + (reg16 & 0xffff));
+    return ldul(segptr + (reg16 & 0xffff));
 }
 
 void save_v86_state(CPUX86State *env)
Index: target-alpha/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/exec.h,v
retrieving revision 1.3
diff -u -d -d -p -r1.3 exec.h
--- target-alpha/exec.h	16 Sep 2007 21:08:01 -0000	1.3
+++ target-alpha/exec.h	13 Oct 2007 22:00:09 -0000
@@ -62,6 +62,9 @@ register uint64_t T2 asm(AREG3);
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-alpha/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/helper.c,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 helper.c
--- target-alpha/helper.c	17 Sep 2007 08:09:51 -0000	1.4
+++ target-alpha/helper.c	13 Oct 2007 22:00:09 -0000
@@ -69,7 +69,7 @@ int cpu_alpha_handle_mmu_fault (CPUState
             env->exception_index = EXCP_DTB_MISS_PAL;
         else
             env->exception_index = EXCP_DTB_MISS_NATIVE;
-        opc = (ldl_code(env->pc) >> 21) << 4;
+        opc = (ldul_code(env->pc) >> 21) << 4;
         if (rw) {
             opc |= 0x9;
         } else {
Index: target-alpha/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/op_helper.c,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_helper.c
--- target-alpha/op_helper.c	16 Sep 2007 21:08:01 -0000	1.2
+++ target-alpha/op_helper.c	13 Oct 2007 22:00:09 -0000
@@ -1207,6 +1207,21 @@ void helper_st_phys_to_virt (void)
 
 #define MMUSUFFIX _mmu
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1218,6 +1233,7 @@ void helper_st_phys_to_virt (void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-alpha/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/op_mem.h,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_mem.h
--- target-alpha/op_mem.h	16 Sep 2007 21:08:01 -0000	1.2
+++ target-alpha/op_mem.h	13 Oct 2007 22:00:09 -0000
@@ -30,7 +30,7 @@ static inline uint32_t glue(ldl_l, MEMSU
 {
     env->lock = EA;
 
-    return glue(ldl, MEMSUFFIX)(EA);
+    return glue(ldul, MEMSUFFIX)(EA);
 }
 
 static inline uint32_t glue(ldq_l, MEMSUFFIX) (target_ulong EA)
@@ -82,7 +82,7 @@ ALPHA_LD_OP(bu, ldub);
 ALPHA_ST_OP(b, stb);
 ALPHA_LD_OP(wu, lduw);
 ALPHA_ST_OP(w, stw);
-ALPHA_LD_OP(l, ldl);
+ALPHA_LD_OP(l, ldul);
 ALPHA_ST_OP(l, stl);
 ALPHA_LD_OP(q, ldq);
 ALPHA_ST_OP(q, stq);
Index: target-alpha/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/translate.c,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 translate.c
--- target-alpha/translate.c	16 Sep 2007 21:08:01 -0000	1.5
+++ target-alpha/translate.c	13 Oct 2007 22:00:09 -0000
@@ -2010,7 +2010,7 @@ int gen_intermediate_code_internal (CPUS
                     ctx.pc, ctx.mem_idx);
         }
 #endif
-        insn = ldl_code(ctx.pc);
+        insn = ldul_code(ctx.pc);
 #if defined ALPHA_DEBUG_DISAS
         insn_count++;
         if (logfile != NULL) {
Index: target-arm/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-arm/exec.h,v
retrieving revision 1.13
diff -u -d -d -p -r1.13 exec.h
--- target-arm/exec.h	16 Sep 2007 21:08:01 -0000	1.13
+++ target-arm/exec.h	13 Oct 2007 22:00:09 -0000
@@ -64,6 +64,9 @@ static inline int cpu_halted(CPUState *e
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 /* In op_helper.c */
Index: target-arm/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/helper.c,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 helper.c
--- target-arm/helper.c	12 Oct 2007 06:47:46 -0000	1.22
+++ target-arm/helper.c	13 Oct 2007 22:00:10 -0000
@@ -297,7 +297,7 @@ void do_interrupt(CPUARMState *env)
             if (env->thumb) {
                 mask = lduw_code(env->regs[15] - 2) & 0xff;
             } else {
-                mask = ldl_code(env->regs[15] - 4) & 0xffffff;
+                mask = ldul_code(env->regs[15] - 4) & 0xffffff;
             }
             /* Only intercept calls from privileged modes, to provide some
                semblance of security.  */
@@ -473,7 +473,7 @@ static int get_phys_addr(CPUState *env, 
         /* Pagetable walk.  */
         /* Lookup l1 descriptor.  */
         table = (env->cp15.c2_base & 0xffffc000) | ((address >> 18) & 0x3ffc);
-        desc = ldl_phys(table);
+        desc = ldul_phys(table);
         type = (desc & 3);
         domain = (env->cp15.c3 >> ((desc >> 4) & 0x1e)) & 3;
         if (type == 0) {
@@ -502,7 +502,7 @@ static int get_phys_addr(CPUState *env, 
                 /* Fine pagetable.  */
                 table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
             }
-            desc = ldl_phys(table);
+            desc = ldul_phys(table);
             switch (desc & 3) {
             case 0: /* Page translation fault.  */
                 code = 7;
Index: target-arm/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/op_helper.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 op_helper.c
--- target-arm/op_helper.c	16 Sep 2007 21:08:02 -0000	1.6
+++ target-arm/op_helper.c	13 Oct 2007 22:00:10 -0000
@@ -180,6 +180,21 @@ void do_vfp_get_fpscr(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -191,6 +206,7 @@ void do_vfp_get_fpscr(void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-arm/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-arm/op_mem.h,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_mem.h
--- target-arm/op_mem.h	30 Apr 2007 02:02:16 -0000	1.2
+++ target-arm/op_mem.h	13 Oct 2007 22:00:10 -0000
@@ -1,18 +1,17 @@
 /* ARM memory operations.  */
 
-/* Load from address T1 into T0.  */
-#define MEM_LD_OP(name) \
+#define MEM_LD_OP(name, lname) \
 void OPPROTO glue(op_ld##name,MEMSUFFIX)(void) \
 { \
-    T0 = glue(ld##name,MEMSUFFIX)(T1); \
+    T0 = glue(ld##lname,MEMSUFFIX)(T1); \
     FORCE_RET(); \
 }
 
-MEM_LD_OP(ub)
-MEM_LD_OP(sb)
-MEM_LD_OP(uw)
-MEM_LD_OP(sw)
-MEM_LD_OP(l)
+MEM_LD_OP(ub,ub)
+MEM_LD_OP(sb,sb)
+MEM_LD_OP(uw,uw)
+MEM_LD_OP(sw,sw)
+MEM_LD_OP(l,ul)
 
 #undef MEM_LD_OP
 
@@ -45,7 +44,7 @@ void OPPROTO glue(op_swp##name,MEMSUFFIX
 }
 
 MEM_SWP_OP(b, ub)
-MEM_SWP_OP(l, l)
+MEM_SWP_OP(l, ul)
 
 #undef MEM_SWP_OP
 
@@ -82,7 +81,7 @@ void OPPROTO glue(op_iwmmxt_st##name,MEM
 
 MMX_MEM_OP(b, ub)
 MMX_MEM_OP(w, uw)
-MMX_MEM_OP(l, l)
+MMX_MEM_OP(l, ul)
 MMX_MEM_OP(q, q)
 
 #undef MMX_MEM_OP
Index: target-arm/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/translate.c,v
retrieving revision 1.57
diff -u -d -d -p -r1.57 translate.c
--- target-arm/translate.c	17 Sep 2007 08:09:51 -0000	1.57
+++ target-arm/translate.c	13 Oct 2007 22:00:10 -0000
@@ -2206,7 +2206,7 @@ static void disas_arm_insn(CPUState * en
 {
     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
 
-    insn = ldl_code(s->pc);
+    insn = ldul_code(s->pc);
     s->pc += 4;
 
     cond = insn >> 28;
Index: target-cris/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-cris/exec.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 exec.h
--- target-cris/exec.h	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/exec.h	13 Oct 2007 22:00:10 -0000
@@ -50,6 +50,9 @@ void tlb_fill (target_ulong addr, int is
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_cris_flush_flags(CPUCRISState *env, int cc_op);
Index: target-cris/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/helper.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 helper.c
--- target-cris/helper.c	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/helper.c	13 Oct 2007 22:00:10 -0000
@@ -106,7 +106,7 @@ void do_interrupt(CPUState *env)
 //			printf ("BREAK! %d\n", env->trapnr);
 			irqnum = env->trapnr;
 			ebp = env->pregs[SR_EBP];
-			isr = ldl_code(ebp + irqnum * 4);
+			isr = ldul_code(ebp + irqnum * 4);
 			env->pregs[SR_ERP] = env->pc + 2;
 			env->pc = isr;
 
@@ -117,7 +117,7 @@ void do_interrupt(CPUState *env)
 //			printf ("MMU miss\n");
 			irqnum = 4;
 			ebp = env->pregs[SR_EBP];
-			isr = ldl_code(ebp + irqnum * 4);
+			isr = ldul_code(ebp + irqnum * 4);
 			env->pregs[SR_ERP] = env->pc;
 			env->pc = isr;
 			cris_shift_ccs(env);
@@ -138,7 +138,7 @@ void do_interrupt(CPUState *env)
 					__builtin_clz(env->pending_interrupts);
 				irqnum += 0x30;
 				ebp = env->pregs[SR_EBP];
-				isr = ldl_code(ebp + irqnum * 4);
+				isr = ldul_code(ebp + irqnum * 4);
 				env->pregs[SR_ERP] = env->pc;
 				env->pc = isr;
 
Index: target-cris/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/op_helper.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 op_helper.c
--- target-cris/op_helper.c	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/op_helper.c	13 Oct 2007 22:00:10 -0000
@@ -25,6 +25,21 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -36,6 +51,7 @@
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-cris/op_mem.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/op_mem.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 op_mem.c
--- target-cris/op_mem.c	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/op_mem.c	13 Oct 2007 22:00:10 -0000
@@ -49,7 +49,7 @@ void glue(op_stw_T0_T1, MEMSUFFIX) (void
 }
 
 void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
-    T0 = glue(ldl, MEMSUFFIX) (T0);
+    T0 = glue(ldul, MEMSUFFIX) (T0);
     RETURN();
 }
 
Index: target-cris/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/translate.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 translate.c
--- target-cris/translate.c	8 Oct 2007 12:49:08 -0000	1.1
+++ target-cris/translate.c	13 Oct 2007 22:00:10 -0000
@@ -828,7 +828,7 @@ static int dec_prep_alu_m(DisasContext *
 		if (memsize == 1)
 			insn_len++;
 
-		imm = ldl_code(dc->pc + 2);
+		imm = ldul_code(dc->pc + 2);
 		if (memsize != 4) {
 			if (s_ext) {
 				imm = sign_extend(imm, (memsize * 8) - 1);
@@ -1962,7 +1962,7 @@ static unsigned int dec_lapc_im(DisasCon
 	rd = dc->op2;
 
 	cris_cc_mask(dc, 0);
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 	DIS(fprintf (logfile, "lapc 0x%x, $r%u\n", imm + dc->pc, dc->op2));
 	gen_op_movl_T0_im (dc->pc + imm);
 	gen_movl_reg_T0[rd] ();
@@ -1999,7 +1999,7 @@ static unsigned int dec_jas_im(DisasCont
 {
 	uint32_t imm;
 
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "jas 0x%x\n", imm));
 	cris_cc_mask(dc, 0);
@@ -2016,7 +2016,7 @@ static unsigned int dec_jasc_im(DisasCon
 {
 	uint32_t imm;
 
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "jasc 0x%x\n", imm));
 	cris_cc_mask(dc, 0);
@@ -2047,7 +2047,7 @@ static unsigned int dec_bcc_im(DisasCont
 	int32_t offset;
 	uint32_t cond = dc->op2;
 
-	offset = ldl_code(dc->pc + 2);
+	offset = ldul_code(dc->pc + 2);
 	offset = sign_extend(offset, 15);
 
 	DIS(fprintf (logfile, "b%s %d pc=%x dst=%x\n",
@@ -2065,7 +2065,7 @@ static unsigned int dec_bas_im(DisasCont
 	int32_t simm;
 
 
-	simm = ldl_code(dc->pc + 2);
+	simm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "bas 0x%x, $p%u\n", dc->pc + simm, dc->op2));
 	cris_cc_mask(dc, 0);
@@ -2081,7 +2081,7 @@ static unsigned int dec_bas_im(DisasCont
 static unsigned int dec_basc_im(DisasContext *dc)
 {
 	int32_t simm;
-	simm = ldl_code(dc->pc + 2);
+	simm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "basc 0x%x, $p%u\n", dc->pc + simm, dc->op2));
 	cris_cc_mask(dc, 0);
@@ -2259,7 +2259,7 @@ cris_decoder(DisasContext *dc)
 	int i;
 
 	/* Load a halfword onto the instruction register.  */
-	tmp = ldl_code(dc->pc);
+	tmp = ldul_code(dc->pc);
 	dc->ir = tmp & 0xffff;
 
 	/* Now decode it.  */
Index: target-i386/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/exec.h,v
retrieving revision 1.37
diff -u -d -d -p -r1.37 exec.h
--- target-i386/exec.h	23 Sep 2007 15:28:04 -0000	1.37
+++ target-i386/exec.h	13 Oct 2007 22:00:10 -0000
@@ -217,6 +217,9 @@ void check_iol_DX(void);
 #if !defined(CONFIG_USER_ONLY)
 
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 
 static inline double ldfq(target_ulong ptr)
 {
@@ -244,7 +247,7 @@ static inline float ldfl(target_ulong pt
         float f;
         uint32_t i;
     } u;
-    u.i = ldl(ptr);
+    u.i = ldul(ptr);
     return u.f;
 }
 
@@ -419,12 +422,12 @@ static inline void helper_fstt(CPU86_LDo
 
 static inline CPU86_LDouble helper_fldt(target_ulong ptr)
 {
-    return *(CPU86_LDouble *)ptr;
+    return *(CPU86_LDouble *)(unsigned long)ptr;
 }
 
 static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
 {
-    *(CPU86_LDouble *)ptr = f;
+    *(CPU86_LDouble *)(unsigned long)ptr = f;
 }
 
 #else
Index: target-i386/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/helper.c,v
retrieving revision 1.89
diff -u -d -d -p -r1.89 helper.c
--- target-i386/helper.c	27 Sep 2007 01:52:00 -0000	1.89
+++ target-i386/helper.c	13 Oct 2007 22:00:10 -0000
@@ -122,8 +122,8 @@ static inline int load_segment(uint32_t 
     if ((index + 7) > dt->limit)
         return -1;
     ptr = dt->base + index;
-    *e1_ptr = ldl_kernel(ptr);
-    *e2_ptr = ldl_kernel(ptr + 4);
+    *e1_ptr = ldul_kernel(ptr);
+    *e2_ptr = ldul_kernel(ptr + 4);
     return 0;
 }
 
@@ -186,7 +186,7 @@ static inline void get_ss_esp_from_tss(u
         *esp_ptr = lduw_kernel(env->tr.base + index);
         *ss_ptr = lduw_kernel(env->tr.base + index + 2);
     } else {
-        *esp_ptr = ldl_kernel(env->tr.base + index);
+        *esp_ptr = ldul_kernel(env->tr.base + index);
         *ss_ptr = lduw_kernel(env->tr.base + index + 4);
     }
 }
@@ -302,15 +302,15 @@ static void switch_tss(int tss_selector,
     /* read all the registers from the new TSS */
     if (type & 8) {
         /* 32 bit */
-        new_cr3 = ldl_kernel(tss_base + 0x1c);
-        new_eip = ldl_kernel(tss_base + 0x20);
-        new_eflags = ldl_kernel(tss_base + 0x24);
+        new_cr3 = ldul_kernel(tss_base + 0x1c);
+        new_eip = ldul_kernel(tss_base + 0x20);
+        new_eflags = ldul_kernel(tss_base + 0x24);
         for(i = 0; i < 8; i++)
-            new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
+            new_regs[i] = ldul_kernel(tss_base + (0x28 + i * 4));
         for(i = 0; i < 6; i++)
             new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
         new_ldt = lduw_kernel(tss_base + 0x60);
-        new_trap = ldl_kernel(tss_base + 0x64);
+        new_trap = ldul_kernel(tss_base + 0x64);
     } else {
         /* 16 bit */
         new_cr3 = 0;
@@ -341,7 +341,7 @@ static void switch_tss(int tss_selector,
         target_ulong ptr;
         uint32_t e2;
         ptr = env->gdt.base + (env->tr.selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
+        e2 = ldul_kernel(ptr + 4);
         e2 &= ~DESC_TSS_BUSY_MASK;
         stl_kernel(ptr + 4, e2);
     }
@@ -393,7 +393,7 @@ static void switch_tss(int tss_selector,
         target_ulong ptr;
         uint32_t e2;
         ptr = env->gdt.base + (tss_selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
+        e2 = ldul_kernel(ptr + 4);
         e2 |= DESC_TSS_BUSY_MASK;
         stl_kernel(ptr + 4, e2);
     }
@@ -456,8 +456,8 @@ static void switch_tss(int tss_selector,
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
             raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
         if (!(e2 & DESC_P_MASK))
@@ -580,7 +580,7 @@ do {\
 
 #define POPL(ssp, sp, sp_mask, val)\
 {\
-    val = (uint32_t)ldl_kernel((ssp) + (sp & (sp_mask)));\
+    val = (uint32_t)ldul_kernel((ssp) + (sp & (sp_mask)));\
     sp += 4;\
 }
 
@@ -629,8 +629,8 @@ static void do_interrupt_protected(int i
     if (intno * 8 + 7 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
     ptr = dt->base + intno * 8;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
+    e1 = ldul_kernel(ptr);
+    e2 = ldul_kernel(ptr + 4);
     /* check gate type */
     type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
     switch(type) {
@@ -875,9 +875,9 @@ static void do_interrupt64(int intno, in
     if (intno * 16 + 15 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
     ptr = dt->base + intno * 16;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
-    e3 = ldl_kernel(ptr + 8);
+    e1 = ldul_kernel(ptr);
+    e2 = ldul_kernel(ptr + 4);
+    e3 = ldul_kernel(ptr + 8);
     /* check gate type */
     type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
     switch(type) {
@@ -1147,7 +1147,7 @@ void do_interrupt_user(int intno, int is
 
     dt = &env->idt;
     ptr = dt->base + (intno * 8);
-    e2 = ldl_kernel(ptr + 4);
+    e2 = ldul_kernel(ptr + 4);
 
     dpl = (e2 >> DESC_DPL_SHIFT) & 3;
     cpl = env->hflags & HF_CPL_MASK;
@@ -1469,24 +1469,24 @@ void helper_rsm(void)
         cpu_x86_load_seg_cache(env, i,
                                lduw_phys(sm_state + offset),
                                ldq_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
+                               ldul_phys(sm_state + offset + 4),
                                (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8);
     }
 
     env->gdt.base = ldq_phys(sm_state + 0x7e68);
-    env->gdt.limit = ldl_phys(sm_state + 0x7e64);
+    env->gdt.limit = ldul_phys(sm_state + 0x7e64);
 
     env->ldt.selector = lduw_phys(sm_state + 0x7e70);
     env->ldt.base = ldq_phys(sm_state + 0x7e78);
-    env->ldt.limit = ldl_phys(sm_state + 0x7e74);
+    env->ldt.limit = ldul_phys(sm_state + 0x7e74);
     env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8;
 
     env->idt.base = ldq_phys(sm_state + 0x7e88);
-    env->idt.limit = ldl_phys(sm_state + 0x7e84);
+    env->idt.limit = ldul_phys(sm_state + 0x7e84);
 
     env->tr.selector = lduw_phys(sm_state + 0x7e90);
     env->tr.base = ldq_phys(sm_state + 0x7e98);
-    env->tr.limit = ldl_phys(sm_state + 0x7e94);
+    env->tr.limit = ldul_phys(sm_state + 0x7e94);
     env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8;
 
     EAX = ldq_phys(sm_state + 0x7ff8);
@@ -1500,51 +1500,51 @@ void helper_rsm(void)
     for(i = 8; i < 16; i++)
         env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8);
     env->eip = ldq_phys(sm_state + 0x7f78);
-    load_eflags(ldl_phys(sm_state + 0x7f70),
+    load_eflags(ldul_phys(sm_state + 0x7f70),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->dr[6] = ldl_phys(sm_state + 0x7f68);
-    env->dr[7] = ldl_phys(sm_state + 0x7f60);
+    env->dr[6] = ldul_phys(sm_state + 0x7f68);
+    env->dr[7] = ldul_phys(sm_state + 0x7f60);
 
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50));
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58));
+    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f48));
+    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7f50));
+    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7f58));
 
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff;
+        env->smbase = ldul_phys(sm_state + 0x7f00) & ~0x7fff;
     }
 #else
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8));
-    load_eflags(ldl_phys(sm_state + 0x7ff4),
+    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7ffc));
+    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7ff8));
+    load_eflags(ldul_phys(sm_state + 0x7ff4),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->eip = ldl_phys(sm_state + 0x7ff0);
-    EDI = ldl_phys(sm_state + 0x7fec);
-    ESI = ldl_phys(sm_state + 0x7fe8);
-    EBP = ldl_phys(sm_state + 0x7fe4);
-    ESP = ldl_phys(sm_state + 0x7fe0);
-    EBX = ldl_phys(sm_state + 0x7fdc);
-    EDX = ldl_phys(sm_state + 0x7fd8);
-    ECX = ldl_phys(sm_state + 0x7fd4);
-    EAX = ldl_phys(sm_state + 0x7fd0);
-    env->dr[6] = ldl_phys(sm_state + 0x7fcc);
-    env->dr[7] = ldl_phys(sm_state + 0x7fc8);
+    env->eip = ldul_phys(sm_state + 0x7ff0);
+    EDI = ldul_phys(sm_state + 0x7fec);
+    ESI = ldul_phys(sm_state + 0x7fe8);
+    EBP = ldul_phys(sm_state + 0x7fe4);
+    ESP = ldul_phys(sm_state + 0x7fe0);
+    EBX = ldul_phys(sm_state + 0x7fdc);
+    EDX = ldul_phys(sm_state + 0x7fd8);
+    ECX = ldul_phys(sm_state + 0x7fd4);
+    EAX = ldul_phys(sm_state + 0x7fd0);
+    env->dr[6] = ldul_phys(sm_state + 0x7fcc);
+    env->dr[7] = ldul_phys(sm_state + 0x7fc8);
 
-    env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff;
-    env->tr.base = ldl_phys(sm_state + 0x7f64);
-    env->tr.limit = ldl_phys(sm_state + 0x7f60);
-    env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
+    env->tr.selector = ldul_phys(sm_state + 0x7fc4) & 0xffff;
+    env->tr.base = ldul_phys(sm_state + 0x7f64);
+    env->tr.limit = ldul_phys(sm_state + 0x7f60);
+    env->tr.flags = (ldul_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
 
-    env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff;
-    env->ldt.base = ldl_phys(sm_state + 0x7f80);
-    env->ldt.limit = ldl_phys(sm_state + 0x7f7c);
-    env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
+    env->ldt.selector = ldul_phys(sm_state + 0x7fc0) & 0xffff;
+    env->ldt.base = ldul_phys(sm_state + 0x7f80);
+    env->ldt.limit = ldul_phys(sm_state + 0x7f7c);
+    env->ldt.flags = (ldul_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
 
-    env->gdt.base = ldl_phys(sm_state + 0x7f74);
-    env->gdt.limit = ldl_phys(sm_state + 0x7f70);
+    env->gdt.base = ldul_phys(sm_state + 0x7f74);
+    env->gdt.limit = ldul_phys(sm_state + 0x7f70);
 
-    env->idt.base = ldl_phys(sm_state + 0x7f58);
-    env->idt.limit = ldl_phys(sm_state + 0x7f54);
+    env->idt.base = ldul_phys(sm_state + 0x7f58);
+    env->idt.limit = ldul_phys(sm_state + 0x7f54);
 
     for(i = 0; i < 6; i++) {
         if (i < 3)
@@ -1552,16 +1552,16 @@ void helper_rsm(void)
         else
             offset = 0x7f2c + (i - 3) * 12;
         cpu_x86_load_seg_cache(env, i,
-                               ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
-                               ldl_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
-                               (ldl_phys(sm_state + offset) & 0xf0ff) << 8);
+                               ldul_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
+                               ldul_phys(sm_state + offset + 8),
+                               ldul_phys(sm_state + offset + 4),
+                               (ldul_phys(sm_state + offset) & 0xf0ff) << 8);
     }
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14));
+    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f14));
 
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff;
+        env->smbase = ldul_phys(sm_state + 0x7ef8) & ~0x7fff;
     }
 #endif
     CC_OP = CC_OP_EFLAGS;
@@ -1761,7 +1761,7 @@ void helper_enter_level(int level, int d
         while (--level) {
             esp -= 4;
             ebp -= 4;
-            stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask)));
+            stl(ssp + (esp & esp_mask), ldul(ssp + (ebp & esp_mask)));
         }
         esp -= 4;
         stl(ssp + (esp & esp_mask), T1);
@@ -1836,8 +1836,8 @@ void helper_lldt_T0(void)
         if ((index + entry_limit) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         if (!(e2 & DESC_P_MASK))
@@ -1845,7 +1845,7 @@ void helper_lldt_T0(void)
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             uint32_t e3;
-            e3 = ldl_kernel(ptr + 8);
+            e3 = ldul_kernel(ptr + 8);
             load_seg_cache_raw_dt(&env->ldt, e1, e2);
             env->ldt.base |= (target_ulong)e3 << 32;
         } else
@@ -1885,8 +1885,8 @@ void helper_ltr_T0(void)
         if ((index + entry_limit) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
         if ((e2 & DESC_S_MASK) ||
             (type != 1 && type != 9))
@@ -1896,8 +1896,8 @@ void helper_ltr_T0(void)
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             uint32_t e3, e4;
-            e3 = ldl_kernel(ptr + 8);
-            e4 = ldl_kernel(ptr + 12);
+            e3 = ldul_kernel(ptr + 8);
+            e4 = ldul_kernel(ptr + 12);
             if ((e4 >> DESC_TYPE_SHIFT) & 0xf)
                 raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
             load_seg_cache_raw_dt(&env->tr, e1, e2);
@@ -1943,8 +1943,8 @@ void load_seg(int seg_reg, int selector)
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
 
         if (!(e2 & DESC_S_MASK))
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
@@ -2273,7 +2273,7 @@ void helper_lcall_protected_T0_T1(int sh
                 PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector);
                 PUSHL(ssp, sp, sp_mask, ESP);
                 for(i = param_count - 1; i >= 0; i--) {
-                    val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
+                    val = ldul_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
                     PUSHL(ssp, sp, sp_mask, val);
                 }
             } else {
@@ -3569,8 +3569,8 @@ void helper_fxrstor(target_ulong ptr, in
 
     if (env->cr[4] & CR4_OSFXSR_MASK) {
         /* XXX: finish it */
-        env->mxcsr = ldl(ptr + 0x18);
-        //ldl(ptr + 0x1c);
+        env->mxcsr = ldul(ptr + 0x18);
+        //ldul(ptr + 0x1c);
         nb_xmm_regs = 8 << data64;
         addr = ptr + 0xa0;
         for(i = 0; i < nb_xmm_regs; i++) {
@@ -3867,6 +3867,7 @@ void update_fp_status(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -3879,6 +3880,21 @@ void update_fp_status(void)
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 #endif
 
 /* try to fill the TLB and return an exception if error. If retaddr is
@@ -4010,13 +4026,13 @@ void helper_vmrun(target_ulong addr)
     env->intercept_cr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write));
     env->intercept_dr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read));
     env->intercept_dr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write));
-    env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
+    env->intercept_exceptions = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
 
     env->gdt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
+    env->gdt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
 
     env->idt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
+    env->idt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
 
     /* clear exit_info_2 so we behave like the real hardware */
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
@@ -4025,7 +4041,7 @@ void helper_vmrun(target_ulong addr)
     cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
     cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
     env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
-    int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+    int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
     if (int_ctl & V_INTR_MASKING_MASK) {
         env->cr[8] = int_ctl & V_TPR_MASK;
         if (env->eflags & IF_MASK)
@@ -4073,11 +4089,11 @@ void helper_vmrun(target_ulong addr)
     regs_to_env();
 
     /* maybe we need to inject an event */
-    event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+    event_inj = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
     if (event_inj & SVM_EVTINJ_VALID) {
         uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
         uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
-        uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
+        uint32_t event_inj_err = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
 
         if (loglevel & CPU_LOG_TB_IN_ASM)
@@ -4309,7 +4325,7 @@ void vmexit(uint64_t exit_code, uint64_t
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]);
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]);
 
-    if ((int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
+    if ((int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
         int_ctl &= ~V_TPR_MASK;
         int_ctl |= env->cr[8] & V_TPR_MASK;
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl);
@@ -4330,10 +4346,10 @@ void vmexit(uint64_t exit_code, uint64_t
     env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
 
     env->gdt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
+    env->gdt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
 
     env->idt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
+    env->idt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
 
     cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
     cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
Index: target-i386/helper2.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/helper2.c,v
retrieving revision 1.52
diff -u -d -d -p -r1.52 helper2.c
--- target-i386/helper2.c	23 Sep 2007 15:28:04 -0000	1.52
+++ target-i386/helper2.c	13 Oct 2007 22:00:10 -0000
@@ -771,7 +771,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
         /* page directory entry */
         pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) &
             env->a20_mask;
-        pde = ldl_phys(pde_addr);
+        pde = ldul_phys(pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             error_code = 0;
             goto do_fault;
@@ -809,7 +809,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
             /* page directory entry */
             pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
                 env->a20_mask;
-            pte = ldl_phys(pte_addr);
+            pte = ldul_phys(pte_addr);
             if (!(pte & PG_PRESENT_MASK)) {
                 error_code = 0;
                 goto do_fault;
@@ -909,13 +909,13 @@ target_phys_addr_t cpu_get_phys_page_deb
 
             pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pml4e = ldl_phys(pml4e_addr);
+            pml4e = ldul_phys(pml4e_addr);
             if (!(pml4e & PG_PRESENT_MASK))
                 return -1;
 
             pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pdpe = ldl_phys(pdpe_addr);
+            pdpe = ldul_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK))
                 return -1;
         } else
@@ -923,14 +923,14 @@ target_phys_addr_t cpu_get_phys_page_deb
         {
             pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
                 env->a20_mask;
-            pdpe = ldl_phys(pdpe_addr);
+            pdpe = ldul_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK))
                 return -1;
         }
 
         pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) &
             env->a20_mask;
-        pde = ldl_phys(pde_addr);
+        pde = ldul_phys(pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             return -1;
         }
@@ -943,7 +943,7 @@ target_phys_addr_t cpu_get_phys_page_deb
             pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) &
                 env->a20_mask;
             page_size = 4096;
-            pte = ldl_phys(pte_addr);
+            pte = ldul_phys(pte_addr);
         }
     } else {
         if (!(env->cr[0] & CR0_PG_MASK)) {
@@ -952,7 +952,7 @@ target_phys_addr_t cpu_get_phys_page_deb
         } else {
             /* page directory entry */
             pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
-            pde = ldl_phys(pde_addr);
+            pde = ldul_phys(pde_addr);
             if (!(pde & PG_PRESENT_MASK))
                 return -1;
             if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
@@ -961,7 +961,7 @@ target_phys_addr_t cpu_get_phys_page_deb
             } else {
                 /* page directory entry */
                 pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
-                pte = ldl_phys(pte_addr);
+                pte = ldul_phys(pte_addr);
                 if (!(pte & PG_PRESENT_MASK))
                     return -1;
                 page_size = 4096;
Index: target-i386/op.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/op.c,v
retrieving revision 1.51
diff -u -d -d -p -r1.51 op.c
--- target-i386/op.c	23 Sep 2007 15:28:04 -0000	1.51
+++ target-i386/op.c	13 Oct 2007 22:00:10 -0000
@@ -716,8 +716,8 @@ void OPPROTO op_boundw(void)
 void OPPROTO op_boundl(void)
 {
     int low, high, v;
-    low = ldl(A0);
-    high = ldl(A0 + 4);
+    low = ldul(A0);
+    high = ldul(A0 + 4);
     v = T0;
     if (v < low || v > high) {
         raise_exception(EXCP05_BOUND);
@@ -747,8 +747,6 @@ void OPPROTO op_exit_tb(void)
 
 /* multiple size ops */
 
-#define ldul ldl
-
 #define SHIFT 0
 #include "ops_template.h"
 #undef SHIFT
@@ -1688,7 +1686,7 @@ CCTable cc_table[CC_OP_NB] = {
 void OPPROTO op_flds_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = ldl(A0);
+    FP_CONVERT.i32 = ldul(A0);
     FT0 = FP_CONVERT.f;
 #else
     FT0 = ldfl(A0);
@@ -1715,7 +1713,7 @@ void helper_fild_FT0_A0(void)
 
 void helper_fildl_FT0_A0(void)
 {
-    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
 }
 
 void helper_fildll_FT0_A0(void)
@@ -1753,10 +1751,10 @@ void OPPROTO op_fild_FT0_A0(void)
 void OPPROTO op_fildl_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    FP_CONVERT.i32 = (int32_t) ldul(A0);
     FT0 = (CPU86_LDouble)FP_CONVERT.i32;
 #else
-    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
 #endif
 }
 
@@ -1778,7 +1776,7 @@ void OPPROTO op_flds_ST0_A0(void)
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = ldl(A0);
+    FP_CONVERT.i32 = ldul(A0);
     env->fpregs[new_fpstt].d = FP_CONVERT.f;
 #else
     env->fpregs[new_fpstt].d = ldfl(A0);
@@ -1822,7 +1820,7 @@ void helper_fildl_ST0_A0(void)
 {
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
 }
@@ -1872,10 +1870,10 @@ void OPPROTO op_fildl_ST0_A0(void)
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    FP_CONVERT.i32 = (int32_t) ldul(A0);
     env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i32;
 #else
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
 #endif
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
Index: target-i386/ops_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/ops_mem.h,v
retrieving revision 1.7
diff -u -d -d -p -r1.7 ops_mem.h
--- target-i386/ops_mem.h	28 Nov 2005 21:02:17 -0000	1.7
+++ target-i386/ops_mem.h	13 Oct 2007 22:00:10 -0000
@@ -20,7 +20,7 @@ void OPPROTO glue(glue(op_ldsw, MEMSUFFI
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T0_A0)(void)
 {
-    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldub, MEMSUFFIX), _T1_A0)(void)
@@ -45,7 +45,7 @@ void OPPROTO glue(glue(op_ldsw, MEMSUFFI
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T1_A0)(void)
 {
-    T1 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+    T1 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_stb, MEMSUFFIX), _T0_A0)(void)
@@ -122,12 +122,12 @@ void OPPROTO glue(glue(op_sto, MEMSUFFIX
 #ifdef TARGET_X86_64
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T0_A0)(void)
 {
-    T0 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    T0 = glue(ldsl, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T1_A0)(void)
 {
-    T1 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    T1 = glue(ldsl, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T0_A0)(void)
Index: target-i386/svm.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/svm.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 svm.h
--- target-i386/svm.h	23 Sep 2007 15:30:28 -0000	1.1
+++ target-i386/svm.h	13 Oct 2007 22:00:10 -0000
@@ -340,13 +340,13 @@ static inline int svm_check_intercept(un
                     R_##seg_index, \
                     lduw_phys(addr + offsetof(struct vmcb, save.seg.selector)),\
                     ldq_phys(addr + offsetof(struct vmcb, save.seg.base)),\
-                    ldl_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
-                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), ldq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldl_phys(addr + offsetof(struct vmcb, save.seg.limit))))
+                    ldul_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
+                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), ldq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldul_phys(addr + offsetof(struct vmcb, save.seg.limit))))
 
 #define SVM_LOAD_SEG2(addr, seg_qemu, seg_vmcb) \
     env->seg_qemu.selector  = lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.selector)); \
     env->seg_qemu.base      = ldq_phys(addr + offsetof(struct vmcb, save.seg_vmcb.base)); \
-    env->seg_qemu.limit     = ldl_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
+    env->seg_qemu.limit     = ldul_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
     env->seg_qemu.flags     = vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.attrib)), env->seg_qemu.base, env->seg_qemu.limit)
 
 #define SVM_SAVE_SEG(addr, seg_qemu, seg_vmcb) \
Index: target-i386/translate-copy.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/translate-copy.c,v
retrieving revision 1.9
diff -u -d -d -p -r1.9 translate-copy.c
--- target-i386/translate-copy.c	17 Sep 2007 08:09:52 -0000	1.9
+++ target-i386/translate-copy.c	13 Oct 2007 22:00:11 -0000
@@ -207,7 +207,7 @@ static inline void gen_lea_modrm(DisasCo
         case 0:
             if (base == 5) {
                 base = -1;
-                disp = ldl_code(s->pc);
+                disp = ldul_code(s->pc);
                 s->pc += 4;
             } else {
                 disp = 0;
@@ -218,7 +218,7 @@ static inline void gen_lea_modrm(DisasCo
             break;
         default:
         case 2:
-            disp = ldl_code(s->pc);
+            disp = ldul_code(s->pc);
             s->pc += 4;
             break;
         }
@@ -266,7 +266,7 @@ static inline uint32_t insn_get(DisasCon
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = ldul_code(s->pc);
         s->pc += 4;
         break;
     }
Index: target-i386/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/translate.c,v
retrieving revision 1.72
diff -u -d -d -p -r1.72 translate.c
--- target-i386/translate.c	27 Sep 2007 01:52:00 -0000	1.72
+++ target-i386/translate.c	13 Oct 2007 22:00:11 -0000
@@ -1462,7 +1462,7 @@ static void gen_lea_modrm(DisasContext *
         case 0:
             if ((base & 7) == 5) {
                 base = -1;
-                disp = (int32_t)ldl_code(s->pc);
+                disp = (int32_t)ldul_code(s->pc);
                 s->pc += 4;
                 if (CODE64(s) && !havesib) {
                     disp += s->pc + s->rip_offset;
@@ -1476,7 +1476,7 @@ static void gen_lea_modrm(DisasContext *
             break;
         default:
         case 2:
-            disp = ldl_code(s->pc);
+            disp = ldul_code(s->pc);
             s->pc += 4;
             break;
         }
@@ -1736,7 +1736,7 @@ static inline uint32_t insn_get(DisasCon
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = ldul_code(s->pc);
         s->pc += 4;
         break;
     }
Index: target-m68k/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/exec.h,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 exec.h
--- target-m68k/exec.h	16 Sep 2007 21:08:03 -0000	1.4
+++ target-m68k/exec.h	13 Oct 2007 22:00:11 -0000
@@ -42,6 +42,9 @@ int cpu_m68k_handle_mmu_fault (CPUState 
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_m68k_flush_flags(CPUM68KState *env, int cc_op);
Index: target-m68k/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/op_helper.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 op_helper.c
--- target-m68k/op_helper.c	16 Sep 2007 21:08:03 -0000	1.6
+++ target-m68k/op_helper.c	13 Oct 2007 22:00:11 -0000
@@ -33,6 +33,21 @@ extern int semihosting_enabled;
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -44,6 +59,7 @@ extern int semihosting_enabled;
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
@@ -83,8 +99,8 @@ static void do_rte(void)
     uint32_t fmt;
 
     sp = env->aregs[7];
-    fmt = ldl_kernel(sp);
-    env->pc = ldl_kernel(sp + 4);
+    fmt = ldul_kernel(sp);
+    env->pc = ldul_kernel(sp + 4);
     sp |= (fmt >> 28) & 3;
     env->sr = fmt & 0xffff;
     m68k_switch_sp(env);
@@ -112,7 +128,7 @@ void do_interrupt(int is_hw)
                     && (env->sr & SR_S) != 0
                     && (env->pc & 3) == 0
                     && lduw_code(env->pc - 4) == 0x4e71
-                    && ldl_code(env->pc) == 0x4e7bf000) {
+                    && ldul_code(env->pc) == 0x4e7bf000) {
                 env->pc += 4;
                 do_m68k_semihosting(env, env->dregs[0]);
                 return;
@@ -153,7 +169,7 @@ void do_interrupt(int is_hw)
     stl_kernel(sp, fmt);
     env->aregs[7] = sp;
     /* Jump to vector.  */
-    env->pc = ldl_kernel(env->vbr + vector);
+    env->pc = ldul_kernel(env->vbr + vector);
 }
 
 #endif
Index: target-m68k/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/op_mem.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 op_mem.h
--- target-m68k/op_mem.h	23 May 2007 19:58:11 -0000	1.1
+++ target-m68k/op_mem.h	13 Oct 2007 22:00:11 -0000
@@ -11,7 +11,7 @@ MEM_LD_OP(8u32,ub)
 MEM_LD_OP(8s32,sb)
 MEM_LD_OP(16u32,uw)
 MEM_LD_OP(16s32,sw)
-MEM_LD_OP(32,l)
+MEM_LD_OP(32,ul)
 
 #undef MEM_LD_OP
 
Index: target-mips/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-mips/exec.h,v
retrieving revision 1.38
diff -u -d -d -p -r1.38 exec.h
--- target-mips/exec.h	9 Oct 2007 03:39:58 -0000	1.38
+++ target-mips/exec.h	13 Oct 2007 22:00:11 -0000
@@ -54,6 +54,9 @@ register target_ulong T2 asm(AREG3);
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 #if defined(TARGET_MIPSN32) || defined(TARGET_MIPS64)
Index: target-mips/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/op_helper.c,v
retrieving revision 1.65
diff -u -d -d -p -r1.65 op_helper.c
--- target-mips/op_helper.c	9 Oct 2007 03:39:58 -0000	1.65
+++ target-mips/op_helper.c	13 Oct 2007 22:00:11 -0000
@@ -544,6 +544,21 @@ static void do_unaligned_access (target_
 #define MMUSUFFIX _mmu
 #define ALIGNED_ONLY
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -555,6 +570,7 @@ static void do_unaligned_access (target_
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr)
 {
Index: target-mips/op_mem.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/op_mem.c,v
retrieving revision 1.14
diff -u -d -d -p -r1.14 op_mem.c
--- target-mips/op_mem.c	9 Oct 2007 03:12:08 -0000	1.14
+++ target-mips/op_mem.c	13 Oct 2007 22:00:11 -0000
@@ -57,13 +57,13 @@ void glue(op_sh, MEMSUFFIX) (void)
 
 void glue(op_lw, MEMSUFFIX) (void)
 {
-    T0 = glue(ldl, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 
 void glue(op_lwu, MEMSUFFIX) (void)
 {
-    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(T0);
+    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 
@@ -167,7 +167,7 @@ void glue(op_swr, MEMSUFFIX) (void)
 void glue(op_ll, MEMSUFFIX) (void)
 {
     T1 = T0;
-    T0 = glue(ldl, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)(T0);
     env->CP0_LLAddr = T1;
     RETURN();
 }
@@ -383,7 +383,7 @@ void glue(op_scd, MEMSUFFIX) (void)
 
 void glue(op_lwc1, MEMSUFFIX) (void)
 {
-    WT0 = glue(ldl, MEMSUFFIX)(T0);
+    WT0 = glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 void glue(op_swc1, MEMSUFFIX) (void)
Index: target-mips/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/translate.c,v
retrieving revision 1.106
diff -u -d -d -p -r1.106 translate.c
--- target-mips/translate.c	9 Oct 2007 03:39:58 -0000	1.106
+++ target-mips/translate.c	13 Oct 2007 22:00:11 -0000
@@ -6544,7 +6544,7 @@ gen_intermediate_code_internal (CPUState
             gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
             gen_opc_instr_start[lj] = 1;
         }
-        ctx.opcode = ldl_code(ctx.pc);
+        ctx.opcode = ldul_code(ctx.pc);
         decode_opc(env, &ctx);
         ctx.pc += 4;
 
Index: target-ppc/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/exec.h,v
retrieving revision 1.28
diff -u -d -d -p -r1.28 exec.h
--- target-ppc/exec.h	7 Oct 2007 18:19:25 -0000	1.28
+++ target-ppc/exec.h	13 Oct 2007 22:00:11 -0000
@@ -91,7 +91,12 @@ static always_inline target_ulong rotl64
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
+
+#include "softmmu_exec.h"
+#define REVERSE_ENDIAN
 #include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
+
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 void do_raise_exception_err (uint32_t exception, int error_code);
Index: target-ppc/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/helper.c,v
retrieving revision 1.75
diff -u -d -d -p -r1.75 helper.c
--- target-ppc/helper.c	8 Oct 2007 02:58:07 -0000	1.75
+++ target-ppc/helper.c	13 Oct 2007 22:00:12 -0000
@@ -514,8 +514,8 @@ static always_inline int _find_pte (mmu_
         } else
 #endif
         {
-            pte0 = ldl_phys(base + (i * 8));
-            pte1 =  ldl_phys(base + (i * 8) + 4);
+            pte0 = ldul_phys(base + (i * 8));
+            pte1 =  ldul_phys(base + (i * 8) + 4);
             r = pte32_check(ctx, pte0, pte1, h, rw);
 #if defined (DEBUG_MMU)
             if (loglevel != 0) {
@@ -623,7 +623,7 @@ static int slb_lookup (CPUPPCState *env,
     slb_nr = env->slb_nr;
     for (n = 0; n < slb_nr; n++) {
         tmp64 = ldq_phys(sr_base);
-        tmp = ldl_phys(sr_base + 8);
+        tmp = ldul_phys(sr_base + 8);
 #if defined(DEBUG_SLB)
         if (loglevel != 0) {
             fprintf(logfile, "%s: seg %d " PADDRX " %016" PRIx64 " %08"
@@ -671,7 +671,7 @@ target_ulong ppc_load_slb (CPUPPCState *
     sr_base = env->spr[SPR_ASR];
     sr_base += 12 * slb_nr;
     tmp64 = ldq_phys(sr_base);
-    tmp = ldl_phys(sr_base + 8);
+    tmp = ldul_phys(sr_base + 8);
     if (tmp64 & 0x0000000008000000ULL) {
         /* SLB entry is valid */
         /* Copy SLB bits 62:88 to Rt 37:63 (VSID 23:49) */
@@ -877,10 +877,10 @@ static int get_segment (CPUState *env, m
                         sdr, mask + 0x80);
                 for (curaddr = sdr; curaddr < (sdr + mask + 0x80);
                      curaddr += 16) {
-                    a0 = ldl_phys(curaddr);
-                    a1 = ldl_phys(curaddr + 4);
-                    a2 = ldl_phys(curaddr + 8);
-                    a3 = ldl_phys(curaddr + 12);
+                    a0 = ldul_phys(curaddr);
+                    a1 = ldul_phys(curaddr + 4);
+                    a2 = ldul_phys(curaddr + 8);
+                    a3 = ldul_phys(curaddr + 12);
                     if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) {
                         fprintf(logfile,
                                 PADDRX ": %08x %08x %08x %08x\n",
@@ -2219,7 +2219,7 @@ static always_inline void powerpc_excp (
 #endif
         /* XXX: this is false */
         /* Get rS/rD and rA from faulting opcode */
-        env->spr[SPR_DSISR] |= (ldl_code((env->nip - 4)) & 0x03FF0000) >> 16;
+        env->spr[SPR_DSISR] |= (ldul_code((env->nip - 4)) & 0x03FF0000) >> 16;
         goto store_current;
     case POWERPC_EXCP_PROGRAM:   /* Program exception                        */
         switch (env->error_code & ~0xF) {
Index: target-ppc/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper.c,v
retrieving revision 1.49
diff -u -d -d -p -r1.49 op_helper.c
--- target-ppc/op_helper.c	7 Oct 2007 17:13:43 -0000	1.49
+++ target-ppc/op_helper.c	13 Oct 2007 22:00:12 -0000
@@ -2291,6 +2291,21 @@ DO_SPE_OP1(fsctuf);
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2302,6 +2317,7 @@ DO_SPE_OP1(fsctuf);
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-ppc/op_helper.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper.h,v
retrieving revision 1.21
diff -u -d -d -p -r1.21 op_helper.h
--- target-ppc/op_helper.h	7 Oct 2007 17:13:44 -0000	1.21
+++ target-ppc/op_helper.h	13 Oct 2007 22:00:12 -0000
@@ -37,19 +37,6 @@ void glue(do_POWER2_lfq_le, MEMSUFFIX) (
 void glue(do_POWER2_stfq, MEMSUFFIX) (void);
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void);
 
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst);
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stsw_64, MEMSUFFIX) (int src);
-void glue(do_stsw_le_64, MEMSUFFIX) (int src);
-void glue(do_lmw_64, MEMSUFFIX) (int dst);
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stmw_64, MEMSUFFIX) (int src);
-void glue(do_stmw_le_64, MEMSUFFIX) (int src);
-void glue(do_icbi_64, MEMSUFFIX) (void);
-void glue(do_dcbz_64, MEMSUFFIX) (void);
-#endif
-
 #else
 
 void do_print_mem_EA (target_ulong EA);
Index: target-ppc/op_helper_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper_mem.h,v
retrieving revision 1.14
diff -u -d -d -p -r1.14 op_helper_mem.h
--- target-ppc/op_helper_mem.h	7 Oct 2007 17:13:44 -0000	1.14
+++ target-ppc/op_helper_mem.h	13 Oct 2007 22:00:12 -0000
@@ -19,85 +19,33 @@
  */
 
 /* Multiple word / string load and store */
-static always_inline target_ulong glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000UL) >> 24) | ((tmp & 0x00FF0000UL) >> 8) |
-        ((tmp & 0x0000FF00UL) << 8) | ((tmp & 0x000000FFUL) << 24);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  target_ulong data)
-{
-    uint32_t tmp =
-        ((data & 0xFF000000UL) >> 24) | ((data & 0x00FF0000UL) >> 8) |
-        ((data & 0x0000FF00UL) << 8) | ((data & 0x000000FFUL) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
 void glue(do_lmw, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldul, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lmw_le, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldulr, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw_le, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_le_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lsw, MEMSUFFIX) (int dst)
 {
@@ -105,71 +53,33 @@ void glue(do_lsw, MEMSUFFIX) (int dst)
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst++] = glue(ldul, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
-#endif
 
 void glue(do_stsw, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 void glue(do_lsw_le, MEMSUFFIX) (int dst)
 {
@@ -177,71 +87,33 @@ void glue(do_lsw_le, MEMSUFFIX) (int dst
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst++] = glue(ldulr, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
-#endif
 
 void glue(do_stsw_le, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_le_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 /* Instruction cache invalidation helper */
 void glue(do_icbi, MEMSUFFIX) (void)
@@ -252,27 +124,11 @@ void glue(do_icbi, MEMSUFFIX) (void)
      * (not a fetch) by the MMU. To be sure it will be so,
      * do the load "by hand".
      */
-    tmp = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint32_t)T0,
-                             (uint32_t)(T0 + env->icache_line_size));
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_icbi_64, MEMSUFFIX) (void)
-{
-    uint64_t tmp;
-    /* Invalidate one cache line :
-     * PowerPC specification says this is to be treated like a load
-     * (not a fetch) by the MMU. To be sure it will be so,
-     * do the load "by hand".
-     */
-    tmp = glue(ldq, MEMSUFFIX)((uint64_t)T0);
+    tmp = glue(ldul, MEMSUFFIX)(T0);
     T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint64_t)T0,
-                             (uint64_t)(T0 + env->icache_line_size));
+    /* We assume it would not wrap around 2^32 on 32 bits targets */
+    tb_invalidate_page_range(T0, T0 + env->icache_line_size);
 }
-#endif
 
 void glue(do_dcbz, MEMSUFFIX) (void)
 {
@@ -281,90 +137,43 @@ void glue(do_dcbz, MEMSUFFIX) (void)
     /* XXX: should be 970 specific (?) */
     if (((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1)
         dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x1C), 0);
-    if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x3CUL), 0);
-        if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x7CUL), 0);
-        }
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_dcbz_64, MEMSUFFIX) (void)
-{
-    int dcache_line_size = env->dcache_line_size;
-
-    /* XXX: should be 970 specific (?) */
-    if (((env->spr[SPR_970_HID5] >> 6) & 0x3) == 0x2)
-        dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x1C), 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x00, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x04, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x08, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x0C, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x10, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x14, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x18, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x1C, 0);
     if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x3CUL), 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x20UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x24UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x28UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x2CUL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x30UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x34UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x38UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x3CUL, 0);
         if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x7CUL), 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x40UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x44UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x48UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x4CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x50UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x54UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x58UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x5CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x60UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x64UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x68UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x6CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x70UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x74UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x78UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x7CUL, 0);
         }
     }
 }
-#endif
 
 /* PowerPC 601 specific instructions (POWER bridge) */
 // XXX: to be tested
@@ -400,26 +209,6 @@ void glue(do_POWER2_lfq, MEMSUFFIX) (voi
     FT1 = glue(ldfq, MEMSUFFIX)((uint32_t)(T0 + 4));
 }
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 void glue(do_POWER2_lfq_le, MEMSUFFIX) (void)
 {
     FT0 = glue(ldfqr, MEMSUFFIX)((uint32_t)(T0 + 4));
@@ -432,25 +221,6 @@ void glue(do_POWER2_stfq, MEMSUFFIX) (vo
     glue(stfq, MEMSUFFIX)((uint32_t)(T0 + 4), FT1);
 }
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void)
 {
     glue(stfqr, MEMSUFFIX)((uint32_t)(T0 + 4), FT0);
Index: target-ppc/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_mem.h,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 op_mem.h
--- target-ppc/op_mem.h	7 Oct 2007 18:19:25 -0000	1.22
+++ target-ppc/op_mem.h	13 Oct 2007 22:00:12 -0000
@@ -18,85 +18,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-static always_inline uint16_t glue(ld16r, MEMSUFFIX) (target_ulong EA)
-{
-    uint16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline int32_t glue(ld16rs, MEMSUFFIX) (target_ulong EA)
-{
-    int16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return (int16_t)((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline uint32_t glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline uint64_t glue(ld64r, MEMSUFFIX) (target_ulong EA)
-{
-    uint64_t tmp = glue(ldq, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00000000000000ULL) >> 56) |
-        ((tmp & 0x00FF000000000000ULL) >> 40) |
-        ((tmp & 0x0000FF0000000000ULL) >> 24) |
-        ((tmp & 0x000000FF00000000ULL) >> 8) |
-        ((tmp & 0x00000000FF000000ULL) << 8) |
-        ((tmp & 0x0000000000FF0000ULL) << 24) |
-        ((tmp & 0x000000000000FF00ULL) << 40) |
-        ((tmp & 0x00000000000000FFULL) << 54);
-}
-#endif
-
-#if defined(TARGET_PPC64)
-static always_inline int64_t glue(ldsl, MEMSUFFIX) (target_ulong EA)
-{
-    return (int32_t)glue(ldl, MEMSUFFIX)(EA);
-}
-
-static always_inline int64_t glue(ld32rs, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return (int32_t)((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-#endif
-
-static always_inline void glue(st16r, MEMSUFFIX) (target_ulong EA,
-                                                  uint16_t data)
-{
-    uint16_t tmp = ((data & 0xFF00) >> 8) | ((data & 0x00FF) << 8);
-    glue(stw, MEMSUFFIX)(EA, tmp);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  uint32_t data)
-{
-    uint32_t tmp = ((data & 0xFF000000) >> 24) | ((data & 0x00FF0000) >> 8) |
-        ((data & 0x0000FF00) << 8) | ((data & 0x000000FF) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline void glue(st64r, MEMSUFFIX) (target_ulong EA,
-                                                  uint64_t data)
-{
-    uint64_t tmp = ((data & 0xFF00000000000000ULL) >> 56) |
-        ((data & 0x00FF000000000000ULL) >> 40) |
-        ((data & 0x0000FF0000000000ULL) >> 24) |
-        ((data & 0x000000FF00000000ULL) >> 8) |
-        ((data & 0x00000000FF000000ULL) << 8) |
-        ((data & 0x0000000000FF0000ULL) << 24) |
-        ((data & 0x000000000000FF00ULL) << 40) |
-        ((data & 0x00000000000000FFULL) << 56);
-    glue(stq, MEMSUFFIX)(EA, tmp);
-}
-#endif
-
 /***                             Integer load                              ***/
 #define PPC_LD_OP(name, op)                                                   \
 void OPPROTO glue(glue(op_l, name), MEMSUFFIX) (void)                         \
@@ -130,10 +51,11 @@ void OPPROTO glue(glue(glue(op_st, name)
 }
 #endif
 
+/* Native-endian fixed-point memory loads                                    */
 PPC_LD_OP(bz, ldub);
 PPC_LD_OP(ha, ldsw);
 PPC_LD_OP(hz, lduw);
-PPC_LD_OP(wz, ldl);
+PPC_LD_OP(wz, ldul);
 #if defined(TARGET_PPC64)
 PPC_LD_OP(d, ldq);
 PPC_LD_OP(wa, ldsl);
@@ -142,23 +64,24 @@ PPC_LD_OP_64(wa, ldsl);
 PPC_LD_OP_64(bz, ldub);
 PPC_LD_OP_64(ha, ldsw);
 PPC_LD_OP_64(hz, lduw);
-PPC_LD_OP_64(wz, ldl);
+PPC_LD_OP_64(wz, ldul);
 #endif
 
-PPC_LD_OP(ha_le, ld16rs);
-PPC_LD_OP(hz_le, ld16r);
-PPC_LD_OP(wz_le, ld32r);
+/* Reverse-endian fixed-point memory loads                                   */
+PPC_LD_OP(ha_le, ldswr);
+PPC_LD_OP(hz_le, lduwr);
+PPC_LD_OP(wz_le, ldulr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP(d_le, ld64r);
-PPC_LD_OP(wa_le, ld32rs);
-PPC_LD_OP_64(d_le, ld64r);
-PPC_LD_OP_64(wa_le, ld32rs);
-PPC_LD_OP_64(ha_le, ld16rs);
-PPC_LD_OP_64(hz_le, ld16r);
-PPC_LD_OP_64(wz_le, ld32r);
+PPC_LD_OP(d_le, ldqr);
+PPC_LD_OP(wa_le, ldslr);
+PPC_LD_OP_64(d_le, ldqr);
+PPC_LD_OP_64(wa_le, ldslr);
+PPC_LD_OP_64(ha_le, ldswr);
+PPC_LD_OP_64(hz_le, lduwr);
+PPC_LD_OP_64(wz_le, ldulr);
 #endif
 
-/***                              Integer store                            ***/
+/* Native-endian fixed-point memory stores                                   */
 PPC_ST_OP(b, stb);
 PPC_ST_OP(h, stw);
 PPC_ST_OP(w, stl);
@@ -170,120 +93,110 @@ PPC_ST_OP_64(h, stw);
 PPC_ST_OP_64(w, stl);
 #endif
 
-PPC_ST_OP(h_le, st16r);
-PPC_ST_OP(w_le, st32r);
+/* Reverse-endian fixed-point memory stores                                  */
+PPC_ST_OP(h_le, stwr);
+PPC_ST_OP(w_le, stlr);
 #if defined(TARGET_PPC64)
-PPC_ST_OP(d_le, st64r);
-PPC_ST_OP_64(d_le, st64r);
-PPC_ST_OP_64(h_le, st16r);
-PPC_ST_OP_64(w_le, st32r);
+PPC_ST_OP(d_le, stqr);
+PPC_ST_OP_64(d_le, stqr);
+PPC_ST_OP_64(h_le, stwr);
+PPC_ST_OP_64(w_le, stlr);
 #endif
 
-/***                Integer load and store with byte reverse               ***/
-PPC_LD_OP(hbr, ld16r);
-PPC_LD_OP(wbr, ld32r);
-PPC_ST_OP(hbr, st16r);
-PPC_ST_OP(wbr, st32r);
+/* Native-endian fixed-point loads and stores with byte-reverse              */
+PPC_LD_OP(hbr, lduwr);
+PPC_LD_OP(wbr, ldulr);
+PPC_ST_OP(hbr, stwr);
+PPC_ST_OP(wbr, stlr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP_64(hbr, ld16r);
-PPC_LD_OP_64(wbr, ld32r);
-PPC_ST_OP_64(hbr, st16r);
-PPC_ST_OP_64(wbr, st32r);
+PPC_LD_OP_64(hbr, lduwr);
+PPC_LD_OP_64(wbr, ldulr);
+PPC_ST_OP_64(hbr, stwr);
+PPC_ST_OP_64(wbr, stlr);
 #endif
 
+/* Reverse-endian fixed-point loads and stores with byte-reverse             */
 PPC_LD_OP(hbr_le, lduw);
-PPC_LD_OP(wbr_le, ldl);
+PPC_LD_OP(wbr_le, ldul);
 PPC_ST_OP(hbr_le, stw);
 PPC_ST_OP(wbr_le, stl);
 #if defined(TARGET_PPC64)
 PPC_LD_OP_64(hbr_le, lduw);
-PPC_LD_OP_64(wbr_le, ldl);
+PPC_LD_OP_64(wbr_le, ldul);
 PPC_ST_OP_64(hbr_le, stw);
 PPC_ST_OP_64(wbr_le, stl);
 #endif
 
-/***                    Integer load and store multiple                    ***/
+/* Native-endian fixed-point loads and stores multiple                       */
 void OPPROTO glue(op_lmw, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
-{
-    glue(do_lmw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
-void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
 {
     glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
+#endif
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
+/* Reverse-endian fixed-point loads and stores multiple                      */
+void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
 {
-    glue(do_stmw_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
 void OPPROTO glue(op_stmw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
-{
-    glue(do_stmw_le_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-/***                    Integer load and store strings                     ***/
-void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw, MEMSUFFIX)(PARAM1);
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+    glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+/* Native-endian loads and stores string                                     */
+void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -303,6 +216,7 @@ void OPPROTO glue(op_lswx, MEMSUFFIX) (v
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
@@ -320,13 +234,44 @@ void OPPROTO glue(op_lswx_64, MEMSUFFIX)
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
+void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
+{
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
+/* Reverse-endian loads and stores string                                    */
+void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+{
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
 void OPPROTO glue(op_lswx_le, MEMSUFFIX) (void)
 {
     /* Note: T1 comes from xer_bc then no cast is needed */
@@ -337,6 +282,7 @@ void OPPROTO glue(op_lswx_le, MEMSUFFIX)
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
@@ -354,29 +300,16 @@ void OPPROTO glue(op_lswx_le_64, MEMSUFF
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
-{
-    glue(do_stsw, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
-{
-    glue(do_stsw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
 void OPPROTO glue(op_stsw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
@@ -384,7 +317,7 @@ void OPPROTO glue(op_stsw_le, MEMSUFFIX)
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_stsw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_stsw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -432,38 +365,9 @@ PPC_STF_OP_64(fs, stfs);
 PPC_STF_OP_64(fiwx, stfiwx);
 #endif
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 static always_inline void glue(stfsr, MEMSUFFIX) (target_ulong EA, double d)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = float64_to_float32(d, &env->fp_status);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stfl, MEMSUFFIX)(EA, u.f);
+    glue(stflr, MEMSUFFIX)(EA, float64_to_float32(d, &env->fp_status));
 }
 
 static always_inline void glue(stfiwxr, MEMSUFFIX) (target_ulong EA, double d)
@@ -475,11 +379,7 @@ static always_inline void glue(stfiwxr, 
 
     /* Store the low order 32 bits without any conversion */
     u.d = d;
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stl, MEMSUFFIX)(EA, u.u);
+    glue(stlr, MEMSUFFIX)(EA, u.u);
 }
 
 PPC_STF_OP(fd_le, stfqr);
@@ -520,40 +420,9 @@ PPC_LDF_OP_64(fd, ldfq);
 PPC_LDF_OP_64(fs, ldfs);
 #endif
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 static always_inline double glue(ldfsr, MEMSUFFIX) (target_ulong EA)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = glue(ldfl, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-
-    return float32_to_float64(u.f, &env->fp_status);
+    return float32_to_float64(glue(ldflr, MEMSUFFIX)(EA), &env->fp_status);
 }
 
 PPC_LDF_OP(fd_le, ldfqr);
@@ -569,7 +438,7 @@ void OPPROTO glue(op_lwarx, MEMSUFFIX) (
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -581,7 +450,7 @@ void OPPROTO glue(op_lwarx_64, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -615,7 +484,7 @@ void OPPROTO glue(op_lwarx_le, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -627,7 +496,7 @@ void OPPROTO glue(op_lwarx_le_64, MEMSUF
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -638,7 +507,7 @@ void OPPROTO glue(op_ldarx_le, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -649,7 +518,7 @@ void OPPROTO glue(op_ldarx_le_64, MEMSUF
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -731,7 +600,7 @@ void OPPROTO glue(op_stwcx_le, MEMSUFFIX
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -748,7 +617,7 @@ void OPPROTO glue(op_stwcx_le_64, MEMSUF
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -764,7 +633,7 @@ void OPPROTO glue(op_stdcx_le, MEMSUFFIX
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -780,7 +649,7 @@ void OPPROTO glue(op_stdcx_le_64, MEMSUF
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -862,6 +731,7 @@ void OPPROTO glue(op_dcbz_l128, MEMSUFFI
 
 void OPPROTO glue(op_dcbz, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
@@ -940,7 +810,7 @@ void OPPROTO glue(op_dcbz_l128_64, MEMSU
 
 void OPPROTO glue(op_dcbz_64, MEMSUFFIX) (void)
 {
-    glue(do_dcbz_64, MEMSUFFIX)();
+    glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -948,6 +818,7 @@ void OPPROTO glue(op_dcbz_64, MEMSUFFIX)
 /* Instruction cache block invalidate */
 void OPPROTO glue(op_icbi, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
@@ -955,7 +826,7 @@ void OPPROTO glue(op_icbi, MEMSUFFIX) (v
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_icbi_64, MEMSUFFIX) (void)
 {
-    glue(do_icbi_64, MEMSUFFIX)();
+    glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -963,14 +834,14 @@ void OPPROTO glue(op_icbi_64, MEMSUFFIX)
 /* External access */
 void OPPROTO glue(op_eciwx, MEMSUFFIX) (void)
 {
-    T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
@@ -991,28 +862,28 @@ void OPPROTO glue(op_ecowx_64, MEMSUFFIX
 
 void OPPROTO glue(op_eciwx_le, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_le_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
 
 void OPPROTO glue(op_ecowx_le, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_ecowx_le_64, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
     RETURN();
 }
 #endif
@@ -1070,8 +941,8 @@ void OPPROTO glue(op_vr_lvx, MEMSUFFIX) 
 
 void OPPROTO glue(op_vr_lvx_le, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint32_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint32_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint32_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx, MEMSUFFIX) (void)
@@ -1082,8 +953,8 @@ void OPPROTO glue(op_vr_stvx, MEMSUFFIX)
 
 void OPPROTO glue(op_vr_stvx_le, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 
 #if defined(TARGET_PPC64)
@@ -1095,8 +966,8 @@ void OPPROTO glue(op_vr_lvx_64, MEMSUFFI
 
 void OPPROTO glue(op_vr_lvx_le_64, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint64_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint64_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint64_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx_64, MEMSUFFIX) (void)
@@ -1107,8 +978,8 @@ void OPPROTO glue(op_vr_stvx_64, MEMSUFF
 
 void OPPROTO glue(op_vr_stvx_le_64, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 #endif
 #undef VR_DWORD0
@@ -1163,14 +1034,14 @@ _PPC_SPE_ST_OP(name, op)
 #if !defined(TARGET_PPC64)
 PPC_SPE_LD_OP(dd, ldq);
 PPC_SPE_ST_OP(dd, stq);
-PPC_SPE_LD_OP(dd_le, ld64r);
-PPC_SPE_ST_OP(dd_le, st64r);
+PPC_SPE_LD_OP(dd_le, ldqr);
+PPC_SPE_ST_OP(dd_le, stqr);
 #endif
 static always_inline uint64_t glue(spe_ldw, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ldl, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ldl, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldul, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldul, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw, spe_ldw);
@@ -1184,16 +1055,16 @@ PPC_SPE_ST_OP(dw, spe_stdw);
 static always_inline uint64_t glue(spe_ldw_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld32r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld32r, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldulr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldulr, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw_le, spe_ldw_le);
 static always_inline void glue(spe_stdw_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data >> 32);
-    glue(st32r, MEMSUFFIX)(EA + 4, data);
+    glue(stlr, MEMSUFFIX)(EA, data >> 32);
+    glue(stlr, MEMSUFFIX)(EA + 4, data);
 }
 PPC_SPE_ST_OP(dw_le, spe_stdw_le);
 static always_inline uint64_t glue(spe_ldh, MEMSUFFIX) (target_ulong EA)
@@ -1218,20 +1089,20 @@ PPC_SPE_ST_OP(dh, spe_stdh);
 static always_inline uint64_t glue(spe_ldh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 4) << 16;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 6);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 4) << 16;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 6);
     return ret;
 }
 PPC_SPE_LD_OP(dh_le, spe_ldh_le);
 static always_inline void glue(spe_stdh_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 4, data >> 16);
-    glue(st16r, MEMSUFFIX)(EA + 6, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 4, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA + 6, data);
 }
 PPC_SPE_ST_OP(dh_le, spe_stdh_le);
 static always_inline uint64_t glue(spe_lwhe, MEMSUFFIX) (target_ulong EA)
@@ -1252,16 +1123,16 @@ PPC_SPE_ST_OP(whe, spe_stwhe);
 static always_inline uint64_t glue(spe_lwhe_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 16;
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 16;
     return ret;
 }
 PPC_SPE_LD_OP(whe_le, spe_lwhe_le);
 static always_inline void glue(spe_stwhe_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 16);
 }
 PPC_SPE_ST_OP(whe_le, spe_stwhe_le);
 static always_inline uint64_t glue(spe_lwhou, MEMSUFFIX) (target_ulong EA)
@@ -1290,24 +1161,24 @@ PPC_SPE_ST_OP(who, spe_stwho);
 static always_inline uint64_t glue(spe_lwhou_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2);
     return ret;
 }
 PPC_SPE_LD_OP(whou_le, spe_lwhou_le);
 static always_inline uint64_t glue(spe_lwhos_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = ((uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA))) << 32;
-    ret |= (uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA + 2));
+    ret = ((uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA))) << 32;
+    ret |= (uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA + 2));
     return ret;
 }
 PPC_SPE_LD_OP(whos_le, spe_lwhos_le);
 static always_inline void glue(spe_stwho_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 2, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 2, data);
 }
 PPC_SPE_ST_OP(who_le, spe_stwho_le);
 #if !defined(TARGET_PPC64)
@@ -1320,7 +1191,7 @@ PPC_SPE_ST_OP(wwo, spe_stwwo);
 static always_inline void glue(spe_stwwo_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data);
+    glue(stlr, MEMSUFFIX)(EA, data);
 }
 PPC_SPE_ST_OP(wwo_le, spe_stwwo_le);
 #endif
@@ -1334,14 +1205,14 @@ PPC_SPE_LD_OP(h, spe_lh);
 static always_inline uint64_t glue(spe_lh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 48) | ((uint64_t)tmp << 16);
 }
 PPC_SPE_LD_OP(h_le, spe_lh_le);
 static always_inline uint64_t glue(spe_lwwsplat, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ldl, MEMSUFFIX)(EA);
+    tmp = glue(ldul, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat, spe_lwwsplat);
@@ -1349,7 +1220,7 @@ static always_inline
 uint64_t glue(spe_lwwsplat_le, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ld32r, MEMSUFFIX)(EA);
+    tmp = glue(ldulr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat_le, spe_lwwsplat_le);
@@ -1369,9 +1240,9 @@ uint64_t glue(spe_lwhsplat_le, MEMSUFFIX
 {
     uint64_t ret;
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     ret = ((uint64_t)tmp << 48) | ((uint64_t)tmp << 32);
-    tmp = glue(ld16r, MEMSUFFIX)(EA + 2);
+    tmp = glue(lduwr, MEMSUFFIX)(EA + 2);
     ret |= ((uint64_t)tmp << 16) | (uint64_t)tmp;
     return ret;
 }
Index: target-ppc/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/translate.c,v
retrieving revision 1.92
diff -u -d -d -p -r1.92 translate.c
--- target-ppc/translate.c	7 Oct 2007 23:10:08 -0000	1.92
+++ target-ppc/translate.c	13 Oct 2007 22:00:12 -0000
@@ -6763,7 +6763,7 @@ static always_inline int gen_intermediat
                     ctx.nip, 1 - msr_pr, msr_ir);
         }
 #endif
-        ctx.opcode = ldl_code(ctx.nip);
+        ctx.opcode = ldul_code(ctx.nip);
         if (msr_le) {
             ctx.opcode = ((ctx.opcode & 0xFF000000) >> 24) |
                 ((ctx.opcode & 0x00FF0000) >> 8) |
Index: target-sh4/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/exec.h,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 exec.h
--- target-sh4/exec.h	16 Sep 2007 21:08:05 -0000	1.5
+++ target-sh4/exec.h	13 Oct 2007 22:00:12 -0000
@@ -48,6 +48,9 @@ static inline int cpu_halted(CPUState *e
 
 #ifndef CONFIG_USER_ONLY
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 #define RETURN() __asm__ __volatile__("")
Index: target-sh4/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/op_helper.c,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 op_helper.c
--- target-sh4/op_helper.c	16 Sep 2007 21:08:05 -0000	1.4
+++ target-sh4/op_helper.c	13 Oct 2007 22:00:12 -0000
@@ -30,6 +30,21 @@ void do_raise_exception(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -41,6 +56,7 @@ void do_raise_exception(void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 void tlb_fill(target_ulong addr, int is_write, int is_user, void *retaddr)
 {
Index: target-sh4/op_mem.c
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/op_mem.c,v
retrieving revision 1.3
diff -u -d -d -p -r1.3 op_mem.c
--- target-sh4/op_mem.c	16 Sep 2007 21:08:05 -0000	1.3
+++ target-sh4/op_mem.c	13 Oct 2007 22:00:12 -0000
@@ -48,7 +48,7 @@ void glue(op_stw_T0_T1, MEMSUFFIX) (void
 }
 
 void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
-    T0 = glue(ldl, MEMSUFFIX) (T0);
+    T0 = glue(ldul, MEMSUFFIX) (T0);
     RETURN();
 }
 
Index: target-sparc/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/exec.h,v
retrieving revision 1.21
diff -u -d -d -p -r1.21 exec.h
--- target-sparc/exec.h	30 Sep 2007 19:38:11 -0000	1.21
+++ target-sparc/exec.h	13 Oct 2007 22:00:12 -0000
@@ -100,6 +100,9 @@ void do_rdpsr();
 /* XXX: move that to a generic header */
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-sparc/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/helper.c,v
retrieving revision 1.27
diff -u -d -d -p -r1.27 helper.c
--- target-sparc/helper.c	24 Sep 2007 19:44:09 -0000	1.27
+++ target-sparc/helper.c	13 Oct 2007 22:00:12 -0000
@@ -129,7 +129,7 @@ int get_physical_address (CPUState *env,
     /* SPARC reference MMU table walk: Context table->L1->L2->PTE */
     /* Context base + context number */
     pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
 
     /* Ctx pde */
     switch (pde & PTE_ENTRYTYPE_MASK) {
@@ -141,7 +141,7 @@ int get_physical_address (CPUState *env,
         return 4 << 2;
     case 1: /* L0 PDE */
         pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
-        pde = ldl_phys(pde_ptr);
+        pde = ldul_phys(pde_ptr);
 
         switch (pde & PTE_ENTRYTYPE_MASK) {
         default:
@@ -151,7 +151,7 @@ int get_physical_address (CPUState *env,
             return (1 << 8) | (4 << 2);
         case 1: /* L1 PDE */
             pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
-            pde = ldl_phys(pde_ptr);
+            pde = ldul_phys(pde_ptr);
 
             switch (pde & PTE_ENTRYTYPE_MASK) {
             default:
@@ -161,7 +161,7 @@ int get_physical_address (CPUState *env,
                 return (2 << 8) | (4 << 2);
             case 1: /* L2 PDE */
                 pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
-                pde = ldl_phys(pde_ptr);
+                pde = ldul_phys(pde_ptr);
 
                 switch (pde & PTE_ENTRYTYPE_MASK) {
                 default:
@@ -265,7 +265,7 @@ target_ulong mmu_probe(CPUState *env, ta
     /* Context base + context number */
     pde_ptr = (target_phys_addr_t)(env->mmuregs[1] << 4) +
         (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
 
     switch (pde & PTE_ENTRYTYPE_MASK) {
     default:
@@ -277,7 +277,7 @@ target_ulong mmu_probe(CPUState *env, ta
         if (mmulev == 3)
             return pde;
         pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
-        pde = ldl_phys(pde_ptr);
+        pde = ldul_phys(pde_ptr);
 
         switch (pde & PTE_ENTRYTYPE_MASK) {
         default:
@@ -290,7 +290,7 @@ target_ulong mmu_probe(CPUState *env, ta
             if (mmulev == 2)
                 return pde;
             pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
-            pde = ldl_phys(pde_ptr);
+            pde = ldul_phys(pde_ptr);
 
             switch (pde & PTE_ENTRYTYPE_MASK) {
             default:
@@ -303,7 +303,7 @@ target_ulong mmu_probe(CPUState *env, ta
                 if (mmulev == 1)
                     return pde;
                 pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
-                pde = ldl_phys(pde_ptr);
+                pde = ldul_phys(pde_ptr);
 
                 switch (pde & PTE_ENTRYTYPE_MASK) {
                 default:
@@ -330,7 +330,7 @@ void dump_mmu(CPUState *env)
 
     printf("MMU dump:\n");
     pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
     printf("Root ptr: " TARGET_FMT_plx ", ctx: %d\n",
            (target_phys_addr_t)env->mmuregs[1] << 4, env->mmuregs[2]);
     for (n = 0, va = 0; n < 256; n++, va += 16 * 1024 * 1024) {
Index: target-sparc/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/op_helper.c,v
retrieving revision 1.41
diff -u -d -d -p -r1.41 op_helper.c
--- target-sparc/op_helper.c	1 Oct 2007 17:07:58 -0000	1.41
+++ target-sparc/op_helper.c	13 Oct 2007 22:00:12 -0000
@@ -184,11 +184,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_code(T0 & ~3);
+            ret = ldul_code(T0 & ~3);
             break;
         case 8:
-            ret = ldl_code(T0 & ~3);
-            T0 = ldl_code((T0 + 4) & ~3);
+            ret = ldul_code(T0 & ~3);
+            T0 = ldul_code((T0 + 4) & ~3);
             break;
         }
         break;
@@ -202,11 +202,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_user(T0 & ~3);
+            ret = ldul_user(T0 & ~3);
             break;
         case 8:
-            ret = ldl_user(T0 & ~3);
-            T0 = ldl_user((T0 + 4) & ~3);
+            ret = ldul_user(T0 & ~3);
+            T0 = ldul_user((T0 + 4) & ~3);
             break;
         }
         break;
@@ -220,11 +220,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_kernel(T0 & ~3);
+            ret = ldul_kernel(T0 & ~3);
             break;
         case 8:
-            ret = ldl_kernel(T0 & ~3);
-            T0 = ldl_kernel((T0 + 4) & ~3);
+            ret = ldul_kernel(T0 & ~3);
+            T0 = ldul_kernel((T0 + 4) & ~3);
             break;
         }
         break;
@@ -243,11 +243,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_phys(T0 & ~3);
+            ret = ldul_phys(T0 & ~3);
             break;
         case 8:
-            ret = ldl_phys(T0 & ~3);
-            T0 = ldl_phys((T0 + 4) & ~3);
+            ret = ldul_phys(T0 & ~3);
+            T0 = ldul_phys((T0 + 4) & ~3);
             break;
         }
         break;
@@ -264,13 +264,13 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32));
+            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
+                            | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         case 8:
-            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32));
-            T0 = ldl_phys((target_phys_addr_t)((T0 + 4) & ~3)
+            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
+                            | ((target_phys_addr_t)(asi & 0xf) << 32));
+            T0 = ldul_phys((target_phys_addr_t)((T0 + 4) & ~3)
                            | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         }
@@ -422,7 +422,7 @@ void helper_st_asi(int asi, int size)
             uint32_t src = T1 & ~3, dst = T0 & ~3, temp;
 
             for (i = 0; i < 32; i += 4, src += 4, dst += 4) {
-                temp = ldl_kernel(src);
+                temp = ldul_kernel(src);
                 stl_kernel(dst, temp);
             }
         }
@@ -525,7 +525,7 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_raw(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_raw(T0 & ~3);
+                ret = ldul_raw(T0 & ~3);
                 break;
             default:
             case 8:
@@ -673,7 +673,7 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_kernel(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_kernel(T0 & ~3);
+                ret = ldul_kernel(T0 & ~3);
                 break;
             default:
             case 8:
@@ -689,7 +689,7 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_user(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_user(T0 & ~3);
+                ret = ldul_user(T0 & ~3);
                 break;
             default:
             case 8:
@@ -711,7 +711,7 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_phys(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_phys(T0 & ~3);
+                ret = ldul_phys(T0 & ~3);
                 break;
             default:
             case 8:
@@ -1497,6 +1497,21 @@ static void do_unaligned_access(target_u
 #define ALIGNED_ONLY
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1508,6 +1523,7 @@ static void do_unaligned_access(target_u
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access(target_ulong addr, int is_write, int is_user,
                                 void *retaddr)
Index: target-sparc/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/op_mem.h,v
retrieving revision 1.10
diff -u -d -d -p -r1.10 op_mem.h
--- target-sparc/op_mem.h	21 Sep 2007 19:10:53 -0000	1.10
+++ target-sparc/op_mem.h	13 Oct 2007 22:00:12 -0000
@@ -17,7 +17,7 @@ void OPPROTO glue(glue(op_, name), MEMSU
     glue(op, MEMSUFFIX)(T0, T1);                                      \
 }
 
-SPARC_LD_OP(ld, ldl);
+SPARC_LD_OP(ld, ldul);
 SPARC_LD_OP(ldub, ldub);
 SPARC_LD_OP(lduh, lduw);
 SPARC_LD_OP_S(ldsb, ldsb);
@@ -42,15 +42,15 @@ void OPPROTO glue(op_ldstub, MEMSUFFIX)(
 
 void OPPROTO glue(op_swap, MEMSUFFIX)(void)
 {
-    target_ulong tmp = glue(ldl, MEMSUFFIX)(T0);
+    target_ulong tmp = glue(ldul, MEMSUFFIX)(T0);
     glue(stl, MEMSUFFIX)(T0, T1);       /* XXX: Should be Atomically */
     T1 = tmp;
 }
 
 void OPPROTO glue(op_ldd, MEMSUFFIX)(void)
 {
-    T1 = glue(ldl, MEMSUFFIX)(T0);
-    T0 = glue(ldl, MEMSUFFIX)((T0 + 4));
+    T1 = glue(ldul, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)((T0 + 4));
 }
 
 /***                         Floating-point store                          ***/
@@ -78,12 +78,12 @@ void OPPROTO glue(op_lddf, MEMSUFFIX) (v
 #ifdef TARGET_SPARC64
 void OPPROTO glue(op_lduw, MEMSUFFIX)(void)
 {
-    T1 = (uint64_t)(glue(ldl, MEMSUFFIX)(T0) & 0xffffffff);
+    T1 = (uint64_t)(glue(ldul, MEMSUFFIX)(T0) & 0xffffffff);
 }
 
 void OPPROTO glue(op_ldsw, MEMSUFFIX)(void)
 {
-    T1 = (int64_t)(glue(ldl, MEMSUFFIX)(T0) & 0xffffffff);
+    T1 = (int64_t)(glue(ldul, MEMSUFFIX)(T0) & 0xffffffff);
 }
 
 SPARC_LD_OP(ldx, ldq);
Index: target-sparc/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/translate.c,v
retrieving revision 1.74
diff -u -d -d -p -r1.74 translate.c
--- target-sparc/translate.c	10 Oct 2007 19:11:54 -0000	1.74
+++ target-sparc/translate.c	13 Oct 2007 22:00:13 -0000
@@ -1089,7 +1089,7 @@ static void disas_sparc_insn(DisasContex
 {
     unsigned int insn, opc, rs1, rs2, rd;
 
-    insn = ldl_code(dc->pc);
+    insn = ldul_code(dc->pc);
     opc = GET_FIELD(insn, 0, 1);
 
     rd = GET_FIELD(insn, 2, 6);

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-13 22:07         ` J. Mayer
@ 2007-10-13 22:53           ` Thiemo Seufer
  2007-10-14  8:19           ` Blue Swirl
  1 sibling, 0 replies; 20+ messages in thread
From: Thiemo Seufer @ 2007-10-13 22:53 UTC (permalink / raw)
  To: J. Mayer; +Cc: qemu-devel

J. Mayer wrote:
> On Sat, 2007-10-13 at 16:17 +0200, J. Mayer wrote:
> > On Sat, 2007-10-13 at 16:07 +0300, Blue Swirl wrote:
> > > On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> > > > On Sat, 2007-10-13 at 13:47 +0300, Blue Swirl wrote:
> > > > > On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> > > > > > The problem:
> > > > > > some CPU architectures, namely PowerPC and maybe others, offers
> > > > > > facilities to access the memory or I/O in the reverse endianness, ie
> > > > > > little-endian instead of big-endian for PowerPC, or provide instruction
> > > > > > to make memory accesses in the "reverse-endian". This is implemented as
> > > > > > a global flag on some CPU. This case is already handled by the PowerPC
> > > > > > emulation but is is far from being optimal. Some other implementations
> > > > > > allow the OS to store an "reverse-endian" flag in the TLB or the segment
> > > > > > descriptors, thus providing per-page or per-segment endianness control.
> > > > > > This is mostly used to ease driver migration from a PC platform to
> > > > > > PowerPC without taking any care of the device endianness in the driver
> > > > > > code (yes, this is bad...).
> > > > >
> > > > > Nice, this may be useful for Sparc64. It has a global CPU flag for
> > > > > endianness, individual pages can be marked as reverse endian, and
> > > > > finally there are instructions that access memory in reverse endian.
> > > > > The end result is a XOR of all these reverses. Though I don't know if
> > > > > any of these features are used at all.
> > > >
> > > > I realized that I/O accesses for reverse-endian pages were not correct
> > > > in the softmmu_template.h header. This new version fixes this. It also
> > > > remove duplicated code in the case of unaligned accesses in a
> > > > reverse-endian page.
> > > 
> > > I think 64 bit access case is not handled correctly, but to solve that
> > > it would be nice to extend the current IO access system to 64 bits.
> > 
> > I think that if it was previously correct, it should still be, but... I
> > don't know how much having 64 bits I/O accesses is interresting, as I
> > don't know if there are real hw buses that have 64 bits data path...
> > 
> > Here's another version taking care of your remark about ldl memory
> > accessors.
> > * I replaced all ldl occurences with ldul
> > * when TARGET_LONG_BITS == 64, I also added ldsl accessors. And I
> > started using it in the PowerPC memory access micro-ops.
> > Then the patch is really more invasive than the previous ones.
> > This still does not break PowerPC or i386 target, as it seems.
> 
> Here's a new version. The only change is that, for consistency, I did
> add the big-endian and little-endian accessors that were documented in
> cpu-all.h as unimplemented. The implementation is quite trivial, having
> native and reverse-endian accessors available, and changes functionnally
> nothing to the previous version.

The previous version works fine here, FWIW.


Thiemo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-13 22:07         ` J. Mayer
  2007-10-13 22:53           ` Thiemo Seufer
@ 2007-10-14  8:19           ` Blue Swirl
  2007-10-14 10:14             ` J. Mayer
  1 sibling, 1 reply; 20+ messages in thread
From: Blue Swirl @ 2007-10-14  8:19 UTC (permalink / raw)
  To: qemu-devel

On 10/14/07, J. Mayer <l_indien@magic.fr> wrote:
> On Sat, 2007-10-13 at 16:17 +0200, J. Mayer wrote:
> > On Sat, 2007-10-13 at 16:07 +0300, Blue Swirl wrote:
> > > On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> > > > On Sat, 2007-10-13 at 13:47 +0300, Blue Swirl wrote:
> > > > > On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> > > > > > The problem:
> > > > > > some CPU architectures, namely PowerPC and maybe others, offers
> > > > > > facilities to access the memory or I/O in the reverse endianness, ie
> > > > > > little-endian instead of big-endian for PowerPC, or provide instruction
> > > > > > to make memory accesses in the "reverse-endian". This is implemented as
> > > > > > a global flag on some CPU. This case is already handled by the PowerPC
> > > > > > emulation but is is far from being optimal. Some other implementations
> > > > > > allow the OS to store an "reverse-endian" flag in the TLB or the segment
> > > > > > descriptors, thus providing per-page or per-segment endianness control.
> > > > > > This is mostly used to ease driver migration from a PC platform to
> > > > > > PowerPC without taking any care of the device endianness in the driver
> > > > > > code (yes, this is bad...).
> > > > >
> > > > > Nice, this may be useful for Sparc64. It has a global CPU flag for
> > > > > endianness, individual pages can be marked as reverse endian, and
> > > > > finally there are instructions that access memory in reverse endian.
> > > > > The end result is a XOR of all these reverses. Though I don't know if
> > > > > any of these features are used at all.
> > > >
> > > > I realized that I/O accesses for reverse-endian pages were not correct
> > > > in the softmmu_template.h header. This new version fixes this. It also
> > > > remove duplicated code in the case of unaligned accesses in a
> > > > reverse-endian page.
> > >
> > > I think 64 bit access case is not handled correctly, but to solve that
> > > it would be nice to extend the current IO access system to 64 bits.
> >
> > I think that if it was previously correct, it should still be, but... I
> > don't know how much having 64 bits I/O accesses is interresting, as I
> > don't know if there are real hw buses that have 64 bits data path...
> >
> > Here's another version taking care of your remark about ldl memory
> > accessors.
> > * I replaced all ldl occurences with ldul
> > * when TARGET_LONG_BITS == 64, I also added ldsl accessors. And I
> > started using it in the PowerPC memory access micro-ops.
> > Then the patch is really more invasive than the previous ones.
> > This still does not break PowerPC or i386 target, as it seems.
>
> Here's a new version. The only change is that, for consistency, I did
> add the big-endian and little-endian accessors that were documented in
> cpu-all.h as unimplemented. The implementation is quite trivial, having
> native and reverse-endian accessors available, and changes functionnally
> nothing to the previous version.

The patch does not apply anymore. The Sparc part looks OK.

The benefits from the patch can be gained by mapping Sparc64 lduw and
ldsw in op_mem.h  directly to ldul and ldsl using SPARC_LD_OP and
replacing the ldl+bswap etc. for the LE cases with ldlr in
op_helper.c. If you prefer, I can do this after you have applied the
patch.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-14  8:19           ` Blue Swirl
@ 2007-10-14 10:14             ` J. Mayer
  2007-10-14 13:22               ` Thiemo Seufer
  0 siblings, 1 reply; 20+ messages in thread
From: J. Mayer @ 2007-10-14 10:14 UTC (permalink / raw)
  To: qemu-devel

On Sun, 2007-10-14 at 11:19 +0300, Blue Swirl wrote:
> On 10/14/07, J. Mayer <l_indien@magic.fr> wrote:
> > On Sat, 2007-10-13 at 16:17 +0200, J. Mayer wrote:
> > > On Sat, 2007-10-13 at 16:07 +0300, Blue Swirl wrote:
> > > > On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> > > > > On Sat, 2007-10-13 at 13:47 +0300, Blue Swirl wrote:
> > > > > > On 10/13/07, J. Mayer <l_indien@magic.fr> wrote:
> > > > > > > The problem:
> > > > > > > some CPU architectures, namely PowerPC and maybe others, offers
> > > > > > > facilities to access the memory or I/O in the reverse endianness, ie
> > > > > > > little-endian instead of big-endian for PowerPC, or provide instruction
> > > > > > > to make memory accesses in the "reverse-endian". This is implemented as
> > > > > > > a global flag on some CPU. This case is already handled by the PowerPC
> > > > > > > emulation but is is far from being optimal. Some other implementations
> > > > > > > allow the OS to store an "reverse-endian" flag in the TLB or the segment
> > > > > > > descriptors, thus providing per-page or per-segment endianness control.
> > > > > > > This is mostly used to ease driver migration from a PC platform to
> > > > > > > PowerPC without taking any care of the device endianness in the driver
> > > > > > > code (yes, this is bad...).
> > > > > >
> > > > > > Nice, this may be useful for Sparc64. It has a global CPU flag for
> > > > > > endianness, individual pages can be marked as reverse endian, and
> > > > > > finally there are instructions that access memory in reverse endian.
> > > > > > The end result is a XOR of all these reverses. Though I don't know if
> > > > > > any of these features are used at all.
> > > > >
> > > > > I realized that I/O accesses for reverse-endian pages were not correct
> > > > > in the softmmu_template.h header. This new version fixes this. It also
> > > > > remove duplicated code in the case of unaligned accesses in a
> > > > > reverse-endian page.
> > > >
> > > > I think 64 bit access case is not handled correctly, but to solve that
> > > > it would be nice to extend the current IO access system to 64 bits.
> > >
> > > I think that if it was previously correct, it should still be, but... I
> > > don't know how much having 64 bits I/O accesses is interresting, as I
> > > don't know if there are real hw buses that have 64 bits data path...
> > >
> > > Here's another version taking care of your remark about ldl memory
> > > accessors.
> > > * I replaced all ldl occurences with ldul
> > > * when TARGET_LONG_BITS == 64, I also added ldsl accessors. And I
> > > started using it in the PowerPC memory access micro-ops.
> > > Then the patch is really more invasive than the previous ones.
> > > This still does not break PowerPC or i386 target, as it seems.
> >
> > Here's a new version. The only change is that, for consistency, I did
> > add the big-endian and little-endian accessors that were documented in
> > cpu-all.h as unimplemented. The implementation is quite trivial, having
> > native and reverse-endian accessors available, and changes functionnally
> > nothing to the previous version.
> 
> The patch does not apply anymore. The Sparc part looks OK.
> 
> The benefits from the patch can be gained by mapping Sparc64 lduw and
> ldsw in op_mem.h  directly to ldul and ldsl using SPARC_LD_OP and
> replacing the ldl+bswap etc. for the LE cases with ldlr in
> op_helper.c. If you prefer, I can do this after you have applied the
> patch.

Yes, there are conflicts between this patch and the mmu_idx one I just
commited. I will regenerate an updated diff in the hours to come, after
I finished commiting the PowerPC fixes and improvments I got waiting in
stock.
For the Sparc improvments, as I merged the PowerPC improvments in the
patch, I think it can be a good idea to include it directly in the
patch.
I'm also wondering if it would not be a good idea to define lduq/ldsq
even if they in fact do exactly what ldq does now, just to have a fully
consistent API.

-- 
J. Mayer <l_indien@magic.fr>
Never organized

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-14 10:14             ` J. Mayer
@ 2007-10-14 13:22               ` Thiemo Seufer
  2007-10-15 11:55                 ` J. Mayer
  0 siblings, 1 reply; 20+ messages in thread
From: Thiemo Seufer @ 2007-10-14 13:22 UTC (permalink / raw)
  To: J. Mayer; +Cc: qemu-devel

J. Mayer wrote:
[snip]
> > > Here's a new version. The only change is that, for consistency, I did
> > > add the big-endian and little-endian accessors that were documented in
> > > cpu-all.h as unimplemented. The implementation is quite trivial, having
> > > native and reverse-endian accessors available, and changes functionnally
> > > nothing to the previous version.
> > 
> > The patch does not apply anymore. The Sparc part looks OK.
> > 
> > The benefits from the patch can be gained by mapping Sparc64 lduw and
> > ldsw in op_mem.h  directly to ldul and ldsl using SPARC_LD_OP and
> > replacing the ldl+bswap etc. for the LE cases with ldlr in
> > op_helper.c. If you prefer, I can do this after you have applied the
> > patch.
> 
> Yes, there are conflicts between this patch and the mmu_idx one I just
> commited. I will regenerate an updated diff in the hours to come, after
> I finished commiting the PowerPC fixes and improvments I got waiting in
> stock.
> For the Sparc improvments, as I merged the PowerPC improvments in the
> patch, I think it can be a good idea to include it directly in the
> patch.
> I'm also wondering if it would not be a good idea to define lduq/ldsq
> even if they in fact do exactly what ldq does now, just to have a fully
> consistent API.

Some architecture specs mention the possibility of 128 bit integers, so
this sounds like a good idea.


Thiemo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-14 13:22               ` Thiemo Seufer
@ 2007-10-15 11:55                 ` J. Mayer
  0 siblings, 0 replies; 20+ messages in thread
From: J. Mayer @ 2007-10-15 11:55 UTC (permalink / raw)
  To: Thiemo Seufer; +Cc: qemu-devel

On Sun, 2007-10-14 at 14:22 +0100, Thiemo Seufer wrote:
> J. Mayer wrote:
> [snip]
> > > > Here's a new version. The only change is that, for consistency, I did
> > > > add the big-endian and little-endian accessors that were documented in
> > > > cpu-all.h as unimplemented. The implementation is quite trivial, having
> > > > native and reverse-endian accessors available, and changes functionnally
> > > > nothing to the previous version.
> > > 
> > > The patch does not apply anymore. The Sparc part looks OK.
> > > 
> > > The benefits from the patch can be gained by mapping Sparc64 lduw and
> > > ldsw in op_mem.h  directly to ldul and ldsl using SPARC_LD_OP and
> > > replacing the ldl+bswap etc. for the LE cases with ldlr in
> > > op_helper.c. If you prefer, I can do this after you have applied the
> > > patch.
> > 
> > Yes, there are conflicts between this patch and the mmu_idx one I just
> > commited. I will regenerate an updated diff in the hours to come, after
> > I finished commiting the PowerPC fixes and improvments I got waiting in
> > stock.
> > For the Sparc improvments, as I merged the PowerPC improvments in the
> > patch, I think it can be a good idea to include it directly in the
> > patch.
> > I'm also wondering if it would not be a good idea to define lduq/ldsq
> > even if they in fact do exactly what ldq does now, just to have a fully
> > consistent API.
> 
> Some architecture specs mention the possibility of 128 bit integers, so
> this sounds like a good idea.

OK, then I'll add this.
And I guess we can avoid the #if (TARGET_LONG_BITS == 64) for ldsl /
ldul changing the return type to target_ulong for those accessors.

-- 
J. Mayer <l_indien@magic.fr>
Never organized

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-16 20:27         ` J. Mayer
@ 2007-11-23 12:55           ` Tero Kaarlela
  0 siblings, 0 replies; 20+ messages in thread
From: Tero Kaarlela @ 2007-11-23 12:55 UTC (permalink / raw)
  To: qemu-devel

Hi,

    Did this patch ever go to CVS?


Tero


J. Mayer wrote:
> On Mon, 2007-10-15 at 20:45 +0300, Blue Swirl wrote:
>   
>> On 10/15/07, Blue Swirl <blauwirbel@gmail.com> wrote:
>>     
>>> On 10/15/07, J. Mayer <l_indien@magic.fr> wrote:
>>>       
>>>> On Sun, 2007-10-14 at 15:59 +0300, Blue Swirl wrote:
>>>>         
>>>>> On 10/14/07, J. Mayer <l_indien@magic.fr> wrote:
>>>>>           
>>>>>> Here's an updated version of the patch against current CVS.
>>>>>> This patches provides reverse-endian, little-endian and big-endian
>>>>>> memory accessors, available with and without softmmu. It also provides
>>>>>> an IO_MEM_REVERSE TLB flag to allow future support of per-page
>>>>>> endianness control, which is required by some targets CPU emulations.
>>>>>> Having reverse-endian memory accessors also make it possible to optimise
>>>>>> reverse-endian memory access when the target CPU has dedicated
>>>>>> instructions. For now, it includes optimisations for the PowerPC target.
>>>>>>             
>>>>> This breaks Sparc32 softmmu, I get a black screen. Your changes to
>>>>> target-sparc and hw/sun4m.c look fine, so the problem could be in IO?
>>>>>           
>>>> Did it worked before my commits ? I may have done something wrong during
>>>> the merge...
>>>> I will do more checks and more tests...
>>>>         
>>> If I disable the IOSWAP code, black screen is gone. I think this is
>>> logical: the io accessors return host CPU values, therefore no byte
>>> swapping need to be performed.
>>>
>>> The attached version works for me.
>>>       
>> This patch takes the reverse endian functions into use for Sparc.
>>
>> I added hypervisor versions of the functions. This is getting a bit
>> ugly, time for #include magic? Physical versions could be useful too.
>>     
>
> My first reaction is to say that it seems pointless to define hypervisor
> access routines when emulating user-mode only. It seems more likely to
> me that any attempt to do such an access in user-mode code would raise a
> privilege exception. If such an access is really possible for any
> reason, I think you're right, it might be time to have a template
> header, called the same way the softmmu_template currently is.
> Then, I tried to go this way, adding a "nommu_header.h" and a
> "nommu_template.h" files.
> I also completed the API, replacing ldq with lduq and adding ldsq
> accessors.
> Here's the updated patch, including the Sparc specific reverse-endian
> changes.
> I did test PowerPC, PowerPC64, i386 softmmu cases and PowerPC linux-user
> case. I also succesfully launched the Sparc, ARM and Coldfire tests
> images available from the Qemu page. I did not succeed with Mips test
> case, but the problems comes during the initialisation, before the
> emulation starts, failing to load the initrd. It seems to me that the
> problems come from the fact I'm running on a 64 bits host: it seems to
> run OK when launch in 32 bits mode but not in 64 bits mode, using clean
> CVS sources. I will check this and propose a fix, if needed.
> There is still at least one bug in that patch: the reverse-endian I/O
> case, in cpu_physical_memory_rw (exec.c) is not handled and is to be
> fixed.
>
>   
> ------------------------------------------------------------------------
>
> ? pc-bios/mips_bios.bin
> Index: cpu-all.h
> ===================================================================
> RCS file: /sources/qemu/qemu/cpu-all.h,v
> retrieving revision 1.76
> diff -u -d -d -p -r1.76 cpu-all.h
> --- cpu-all.h	23 Sep 2007 15:28:03 -0000	1.76
> +++ cpu-all.h	16 Oct 2007 11:39:03 -0000
> @@ -149,7 +149,7 @@ typedef union {
>   *   f    : float access
>   *
>   * sign is:
> - * (empty): for floats or 32 bit size
> + * (empty): for floats
>   *   u    : unsigned
>   *   s    : signed
>   *
> @@ -161,9 +161,9 @@ typedef union {
>   *
>   * endian is:
>   * (empty): target cpu endianness or 8 bit access
> - *   r    : reversed target cpu endianness (not implemented yet)
> - *   be   : big endian (not implemented yet)
> - *   le   : little endian (not implemented yet)
> + *   r    : reversed target cpu endianness
> + *   be   : big endian
> + *   le   : little endian
>   *
>   * access_type is:
>   *   raw    : host memory access
> @@ -215,24 +215,45 @@ static inline int ldsw_le_p(void *ptr)
>  #endif
>  }
>  
> -static inline int ldl_le_p(void *ptr)
> +static inline target_long ldul_le_p(void *ptr)
>  {
>  #ifdef __powerpc__
>      int val;
>      __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
> -    return val;
> +    return (uint32_t)val;
>  #else
>      uint8_t *p = ptr;
>      return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
>  #endif
>  }
>  
> -static inline uint64_t ldq_le_p(void *ptr)
> +static inline target_long ldsl_le_p(void *ptr)
> +{
> +#ifdef __powerpc__
> +    int val;
> +    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
> +    return (int32_t)val;
> +#else
> +    uint8_t *p = ptr;
> +    return (int32_t)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24));
> +#endif
> +}
> +
> +static inline uint64_t lduq_le_p(void *ptr)
>  {
>      uint8_t *p = ptr;
>      uint32_t v1, v2;
> -    v1 = ldl_le_p(p);
> -    v2 = ldl_le_p(p + 4);
> +    v1 = ldul_le_p(p);
> +    v2 = ldul_le_p(p + 4);
> +    return v1 | ((uint64_t)v2 << 32);
> +}
> +
> +static inline int64_t ldsq_le_p(void *ptr)
> +{
> +    uint8_t *p = ptr;
> +    uint32_t v1, v2;
> +    v1 = ldul_le_p(p);
> +    v2 = ldul_le_p(p + 4);
>      return v1 | ((uint64_t)v2 << 32);
>  }
>  
> @@ -275,7 +296,7 @@ static inline float32 ldfl_le_p(void *pt
>          float32 f;
>          uint32_t i;
>      } u;
> -    u.i = ldl_le_p(ptr);
> +    u.i = ldul_le_p(ptr);
>      return u.f;
>  }
>  
> @@ -292,8 +313,8 @@ static inline void stfl_le_p(void *ptr, 
>  static inline float64 ldfq_le_p(void *ptr)
>  {
>      CPU_DoubleU u;
> -    u.l.lower = ldl_le_p(ptr);
> -    u.l.upper = ldl_le_p(ptr + 4);
> +    u.l.lower = ldul_le_p(ptr);
> +    u.l.upper = ldul_le_p(ptr + 4);
>      return u.d;
>  }
>  
> @@ -317,16 +338,26 @@ static inline int ldsw_le_p(void *ptr)
>      return *(int16_t *)ptr;
>  }
>  
> -static inline int ldl_le_p(void *ptr)
> +static inline target_long ldul_le_p(void *ptr)
>  {
>      return *(uint32_t *)ptr;
>  }
>  
> -static inline uint64_t ldq_le_p(void *ptr)
> +static inline target_long ldsl_le_p(void *ptr)
> +{
> +    return *(int32_t *)ptr;
> +}
> +
> +static inline uint64_t lduq_le_p(void *ptr)
>  {
>      return *(uint64_t *)ptr;
>  }
>  
> +static inline int64_t ldsq_le_p(void *ptr)
> +{
> +    return *(int64_t *)ptr;
> +}
> +
>  static inline void stw_le_p(void *ptr, int v)
>  {
>      *(uint16_t *)ptr = v;
> @@ -397,7 +428,7 @@ static inline int ldsw_be_p(void *ptr)
>  #endif
>  }
>  
> -static inline int ldl_be_p(void *ptr)
> +static inline target_long ldul_be_p(void *ptr)
>  {
>  #if defined(__i386__) || defined(__x86_64__)
>      int val;
> @@ -405,18 +436,41 @@ static inline int ldl_be_p(void *ptr)
>                    "bswap %0\n"
>                    : "=r" (val)
>                    : "m" (*(uint32_t *)ptr));
> -    return val;
> +    return (uint32_t)val;
>  #else
>      uint8_t *b = (uint8_t *) ptr;
>      return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
>  #endif
>  }
>  
> -static inline uint64_t ldq_be_p(void *ptr)
> +static inline target_long ldsl_be_p(void *ptr)
> +{
> +#if defined(__i386__) || defined(__x86_64__)
> +    int val;
> +    asm volatile ("movl %1, %0\n"
> +                  "bswap %0\n"
> +                  : "=r" (val)
> +                  : "m" (*(uint32_t *)ptr));
> +    return (int32_t)val;
> +#else
> +    uint8_t *b = (uint8_t *) ptr;
> +    return (int32_t)((b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]);
> +#endif
> +}
> +
> +static inline uint64_t lduq_be_p(void *ptr)
>  {
>      uint32_t a,b;
> -    a = ldl_be_p(ptr);
> -    b = ldl_be_p(ptr+4);
> +    a = ldul_be_p(ptr);
> +    b = ldul_be_p(ptr+4);
> +    return (((uint64_t)a<<32)|b);
> +}
> +
> +static inline int64_t ldsq_be_p(void *ptr)
> +{
> +    uint32_t a,b;
> +    a = ldul_be_p(ptr);
> +    b = ldul_be_p(ptr+4);
>      return (((uint64_t)a<<32)|b);
>  }
>  
> @@ -464,7 +518,7 @@ static inline float32 ldfl_be_p(void *pt
>          float32 f;
>          uint32_t i;
>      } u;
> -    u.i = ldl_be_p(ptr);
> +    u.i = ldul_be_p(ptr);
>      return u.f;
>  }
>  
> @@ -481,8 +535,8 @@ static inline void stfl_be_p(void *ptr, 
>  static inline float64 ldfq_be_p(void *ptr)
>  {
>      CPU_DoubleU u;
> -    u.l.upper = ldl_be_p(ptr);
> -    u.l.lower = ldl_be_p(ptr + 4);
> +    u.l.upper = ldul_be_p(ptr);
> +    u.l.lower = ldul_be_p(ptr + 4);
>      return u.d;
>  }
>  
> @@ -506,16 +560,26 @@ static inline int ldsw_be_p(void *ptr)
>      return *(int16_t *)ptr;
>  }
>  
> -static inline int ldl_be_p(void *ptr)
> +static inline target_long ldul_be_p(void *ptr)
>  {
>      return *(uint32_t *)ptr;
>  }
>  
> -static inline uint64_t ldq_be_p(void *ptr)
> +static inline target_long ldsl_be_p(void *ptr)
> +{
> +    return *(int32_t *)ptr;
> +}
> +
> +static inline uint64_t lduq_be_p(void *ptr)
>  {
>      return *(uint64_t *)ptr;
>  }
>  
> +static inline int64_t ldsq_be_p(void *ptr)
> +{
> +    return *(int64_t *)ptr;
> +}
> +
>  static inline void stw_be_p(void *ptr, int v)
>  {
>      *(uint16_t *)ptr = v;
> @@ -557,10 +621,13 @@ static inline void stfq_be_p(void *ptr, 
>  
>  /* target CPU memory access functions */
>  #if defined(TARGET_WORDS_BIGENDIAN)
> +/* native-endian */
>  #define lduw_p(p) lduw_be_p(p)
>  #define ldsw_p(p) ldsw_be_p(p)
> -#define ldl_p(p) ldl_be_p(p)
> -#define ldq_p(p) ldq_be_p(p)
> +#define ldul_p(p) ldul_be_p(p)
> +#define ldsl_p(p) ldsl_be_p(p)
> +#define lduq_p(p) lduq_be_p(p)
> +#define ldsq_p(p) ldsq_be_p(p)
>  #define ldfl_p(p) ldfl_be_p(p)
>  #define ldfq_p(p) ldfq_be_p(p)
>  #define stw_p(p, v) stw_be_p(p, v)
> @@ -568,11 +635,28 @@ static inline void stfq_be_p(void *ptr, 
>  #define stq_p(p, v) stq_be_p(p, v)
>  #define stfl_p(p, v) stfl_be_p(p, v)
>  #define stfq_p(p, v) stfq_be_p(p, v)
> +/* reverse-endian */
> +#define lduwr_p(p) lduw_le_p(p)
> +#define ldswr_p(p) ldsw_le_p(p)
> +#define ldulr_p(p) ldul_le_p(p)
> +#define ldslr_p(p) ldsl_le_p(p)
> +#define lduqr_p(p) lduq_le_p(p)
> +#define ldsqr_p(p) ldsq_le_p(p)
> +#define ldflr_p(p) ldfl_le_p(p)
> +#define ldfqr_p(p) ldfq_le_p(p)
> +#define stwr_p(p, v) stw_le_p(p, v)
> +#define stlr_p(p, v) stl_le_p(p, v)
> +#define stqr_p(p, v) stq_le_p(p, v)
> +#define stflr_p(p, v) stfl_le_p(p, v)
> +#define stfqr_p(p, v) stfq_le_p(p, v)
>  #else
> +/* native-endian */
>  #define lduw_p(p) lduw_le_p(p)
>  #define ldsw_p(p) ldsw_le_p(p)
> -#define ldl_p(p) ldl_le_p(p)
> -#define ldq_p(p) ldq_le_p(p)
> +#define ldul_p(p) ldul_le_p(p)
> +#define ldsl_p(p) ldsl_le_p(p)
> +#define lduq_p(p) lduq_le_p(p)
> +#define ldsq_p(p) ldsq_le_p(p)
>  #define ldfl_p(p) ldfl_le_p(p)
>  #define ldfq_p(p) ldfq_le_p(p)
>  #define stw_p(p, v) stw_le_p(p, v)
> @@ -580,6 +664,20 @@ static inline void stfq_be_p(void *ptr, 
>  #define stq_p(p, v) stq_le_p(p, v)
>  #define stfl_p(p, v) stfl_le_p(p, v)
>  #define stfq_p(p, v) stfq_le_p(p, v)
> +/* reverse-endian */
> +#define lduwr_p(p) lduw_be_p(p)
> +#define ldswr_p(p) ldsw_be_p(p)
> +#define ldulr_p(p) ldul_be_p(p)
> +#define ldslr_p(p) ldsl_be_p(p)
> +#define lduqr_p(p) lduq_be_p(p)
> +#define ldsqr_p(p) ldsq_be_p(p)
> +#define ldflr_p(p) ldfl_be_p(p)
> +#define ldfqr_p(p) ldfq_be_p(p)
> +#define stwr_p(p, v) stw_be_p(p, v)
> +#define stlr_p(p, v) stl_be_p(p, v)
> +#define stqr_p(p, v) stq_be_p(p, v)
> +#define stflr_p(p, v) stfl_be_p(p, v)
> +#define stfqr_p(p, v) stfq_be_p(p, v)
>  #endif
>  
>  /* MMU memory access macros */
> @@ -605,12 +703,15 @@ static inline void stfq_be_p(void *ptr, 
>  #define laddr(x) (uint8_t *)(long)(x)
>  #endif
>  
> +/* native-endian */
>  #define ldub_raw(p) ldub_p(laddr((p)))
>  #define ldsb_raw(p) ldsb_p(laddr((p)))
>  #define lduw_raw(p) lduw_p(laddr((p)))
>  #define ldsw_raw(p) ldsw_p(laddr((p)))
> -#define ldl_raw(p) ldl_p(laddr((p)))
> -#define ldq_raw(p) ldq_p(laddr((p)))
> +#define ldul_raw(p) ldul_p(laddr((p)))
> +#define ldsl_raw(p) ldsl_p(laddr((p)))
> +#define lduq_raw(p) lduq_p(laddr((p)))
> +#define ldsq_raw(p) ldsq_p(laddr((p)))
>  #define ldfl_raw(p) ldfl_p(laddr((p)))
>  #define ldfq_raw(p) ldfq_p(laddr((p)))
>  #define stb_raw(p, v) stb_p(saddr((p)), v)
> @@ -619,47 +720,77 @@ static inline void stfq_be_p(void *ptr, 
>  #define stq_raw(p, v) stq_p(saddr((p)), v)
>  #define stfl_raw(p, v) stfl_p(saddr((p)), v)
>  #define stfq_raw(p, v) stfq_p(saddr((p)), v)
> -
> +/* reverse endian */
> +#define ldubr_raw(p) ldub_p(laddr((p)))
> +#define ldsbr_raw(p) ldsb_p(laddr((p)))
> +#define lduwr_raw(p) lduwr_p(laddr((p)))
> +#define ldswr_raw(p) ldswr_p(laddr((p)))
> +#define ldulr_raw(p) ldulr_p(laddr((p)))
> +#define ldslr_raw(p) ldslr_p(laddr((p)))
> +#define lduqr_raw(p) lduqr_p(laddr((p)))
> +#define ldsqr_raw(p) ldsqr_p(laddr((p)))
> +#define ldflr_raw(p) ldflr_p(laddr((p)))
> +#define ldfqr_raw(p) ldfqr_p(laddr((p)))
> +#define stbr_raw(p, v) stb_p(saddr((p)), v)
> +#define stwr_raw(p, v) stwr_p(saddr((p)), v)
> +#define stlr_raw(p, v) stlr_p(saddr((p)), v)
> +#define stqr_raw(p, v) stqr_p(saddr((p)), v)
> +#define stflr_raw(p, v) stflr_p(saddr((p)), v)
> +#define stfqr_raw(p, v) stfqr_p(saddr((p)), v)
> +/* big-endian accesses */
> +#define SUFFIX _raw
> +#define ESUFFIX _be
> +#define ADDR_TYPE void *
> +#define LADDR_TYPE (void *)
> +#ifdef TARGET_WORDS_BIGENDIAN
> +#define LSUFFIX _raw
> +#else
> +#define LSUFFIX r_raw
> +#endif
> +#include "nommu_template.h"
> +#undef ADDR_TYPE
> +#undef SUFFIX
> +/* little-endian accesses */
> +#define SUFFIX _raw
> +#define ESUFFIX _le
> +#define ADDR_TYPE void *
> +#define LADDR_TYPE (void *)
> +#ifdef TARGET_WORDS_BIGENDIAN
> +#define LSUFFIX r_raw
> +#else
> +#define LSUFFIX _raw
> +#endif
> +#include "nommu_template.h"
> +#undef ADDR_TYPE
> +#undef SUFFIX
>  
>  #if defined(CONFIG_USER_ONLY)
>  
> -/* if user mode, no other memory access functions */
> -#define ldub(p) ldub_raw(p)
> -#define ldsb(p) ldsb_raw(p)
> -#define lduw(p) lduw_raw(p)
> -#define ldsw(p) ldsw_raw(p)
> -#define ldl(p) ldl_raw(p)
> -#define ldq(p) ldq_raw(p)
> -#define ldfl(p) ldfl_raw(p)
> -#define ldfq(p) ldfq_raw(p)
> -#define stb(p, v) stb_raw(p, v)
> -#define stw(p, v) stw_raw(p, v)
> -#define stl(p, v) stl_raw(p, v)
> -#define stq(p, v) stq_raw(p, v)
> -#define stfl(p, v) stfl_raw(p, v)
> -#define stfq(p, v) stfq_raw(p, v)
> -
> -#define ldub_code(p) ldub_raw(p)
> -#define ldsb_code(p) ldsb_raw(p)
> -#define lduw_code(p) lduw_raw(p)
> -#define ldsw_code(p) ldsw_raw(p)
> -#define ldl_code(p) ldl_raw(p)
> -#define ldq_code(p) ldq_raw(p)
> -
> -#define ldub_kernel(p) ldub_raw(p)
> -#define ldsb_kernel(p) ldsb_raw(p)
> -#define lduw_kernel(p) lduw_raw(p)
> -#define ldsw_kernel(p) ldsw_raw(p)
> -#define ldl_kernel(p) ldl_raw(p)
> -#define ldq_kernel(p) ldq_raw(p)
> -#define ldfl_kernel(p) ldfl_raw(p)
> -#define ldfq_kernel(p) ldfq_raw(p)
> -#define stb_kernel(p, v) stb_raw(p, v)
> -#define stw_kernel(p, v) stw_raw(p, v)
> -#define stl_kernel(p, v) stl_raw(p, v)
> -#define stq_kernel(p, v) stq_raw(p, v)
> -#define stfl_kernel(p, v) stfl_raw(p, v)
> -#define stfq_kernel(p, vt) stfq_raw(p, v)
> +#define SUFFIX
> +#define ADDR_TYPE unsigned long
> +#include "nommu_header.h"
> +#define SUFFIX _code
> +#define ADDR_TYPE unsigned long
> +#include "nommu_header.h"
> +#define SUFFIX MMU_MODE0_SUFFIX
> +#define ADDR_TYPE unsigned long
> +#include "nommu_header.h"
> +#define SUFFIX MMU_MODE1_SUFFIX
> +#define ADDR_TYPE unsigned long
> +#include "nommu_header.h"
> +#if (NB_MMU_MODES >= 3)
> +#define SUFFIX MMU_MODE2_SUFFIX
> +#define ADDR_TYPE unsigned long
> +#include "nommu_header.h"
> +#if (NB_MMU_MODES >= 4)
> +#define SUFFIX MMU_MODE3_SUFFIX
> +#define ADDR_TYPE unsigned long
> +#include "nommu_header.h"
> +#if (NB_MMU_MODES > 4)
> +#error "NB_MMU_MODES > 4 is not supported for now"
> +#endif /* (NB_MMU_MODES > 4) */
> +#endif /* (NB_MMU_MODES >= 4) */
> +#endif /* (NB_MMU_MODES >= 3) */
>  
>  #endif /* defined(CONFIG_USER_ONLY) */
>  
> @@ -790,6 +921,8 @@ extern uint8_t *phys_ram_dirty;
>     the physical address */
>  #define IO_MEM_ROMD        (1)
>  #define IO_MEM_SUBPAGE     (2)
> +/* On some target CPUs, endianness is stored in page tables */
> +#define IO_MEM_REVERSE     (4)
>  
>  typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
>  typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
> @@ -821,8 +954,8 @@ static inline void cpu_physical_memory_w
>  }
>  uint32_t ldub_phys(target_phys_addr_t addr);
>  uint32_t lduw_phys(target_phys_addr_t addr);
> -uint32_t ldl_phys(target_phys_addr_t addr);
> -uint64_t ldq_phys(target_phys_addr_t addr);
> +uint32_t ldul_phys(target_phys_addr_t addr);
> +uint64_t lduq_phys(target_phys_addr_t addr);
>  void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val);
>  void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val);
>  void stb_phys(target_phys_addr_t addr, uint32_t val);
> Index: cpu-exec.c
> ===================================================================
> RCS file: /sources/qemu/qemu/cpu-exec.c,v
> retrieving revision 1.120
> diff -u -d -d -p -r1.120 cpu-exec.c
> --- cpu-exec.c	14 Oct 2007 07:07:04 -0000	1.120
> +++ cpu-exec.c	16 Oct 2007 11:39:03 -0000
> @@ -436,12 +436,12 @@ int cpu_exec(CPUState *env1)
>                           /* FIXME: this should respect TPR */
>                           env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
>                           svm_check_intercept(SVM_EXIT_VINTR);
> -                         intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
> +                         intno = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
>                           if (loglevel & CPU_LOG_TB_IN_ASM)
>                               fprintf(logfile, "Servicing virtual hardware INT=0x%02x\n", intno);
>  	                 do_interrupt(intno, 0, 0, -1, 1);
>                           stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl),
> -                                  ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
> +                                  ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
>  #if defined(__sparc__) && !defined(HOST_SOLARIS)
>                           tmp_T0 = 0;
>  #else
> Index: exec-all.h
> ===================================================================
> RCS file: /sources/qemu/qemu/exec-all.h,v
> retrieving revision 1.68
> diff -u -d -d -p -r1.68 exec-all.h
> --- exec-all.h	14 Oct 2007 07:07:04 -0000	1.68
> +++ exec-all.h	16 Oct 2007 11:39:03 -0000
> @@ -569,6 +569,21 @@ void tlb_fill(target_ulong addr, int is_
>  #define MEMSUFFIX _code
>  #define env cpu_single_env
>  
> +/* native-endian */
> +#define DATA_SIZE 1
> +#include "softmmu_header.h"
> +
> +#define DATA_SIZE 2
> +#include "softmmu_header.h"
> +
> +#define DATA_SIZE 4
> +#include "softmmu_header.h"
> +
> +#define DATA_SIZE 8
> +#include "softmmu_header.h"
> +
> +/* reverse-endian */
> +#define REVERSE_ENDIAN
>  #define DATA_SIZE 1
>  #include "softmmu_header.h"
>  
> @@ -580,6 +595,7 @@ void tlb_fill(target_ulong addr, int is_
>  
>  #define DATA_SIZE 8
>  #include "softmmu_header.h"
> +#undef REVERSE_ENDIAN
>  
>  #undef ACCESS_TYPE
>  #undef MEMSUFFIX
> Index: exec.c
> ===================================================================
> RCS file: /sources/qemu/qemu/exec.c,v
> retrieving revision 1.109
> diff -u -d -d -p -r1.109 exec.c
> --- exec.c	14 Oct 2007 07:07:04 -0000	1.109
> +++ exec.c	16 Oct 2007 11:39:03 -0000
> @@ -2202,7 +2202,7 @@ static uint32_t watch_mem_readw(void *op
>  
>  static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
>  {
> -    return ldl_phys(addr);
> +    return ldul_phys(addr);
>  }
>  
>  /* Generate a debug exception if a watchpoint has been hit.
> @@ -2507,7 +2507,7 @@ void cpu_physical_memory_rw(target_phys_
>      uint8_t *ptr;
>      uint32_t val;
>      target_phys_addr_t page;
> -    unsigned long pd;
> +    unsigned long pd, addr1;
>      PhysPageDesc *p;
>  
>      while (len > 0) {
> @@ -2524,31 +2524,54 @@ void cpu_physical_memory_rw(target_phys_
>  
>          if (is_write) {
>              if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
> -                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
> -                /* XXX: could force cpu_single_env to NULL to avoid
> -                   potential bugs */
> -                if (l >= 4 && ((addr & 3) == 0)) {
> -                    /* 32 bit write access */
> -                    val = ldl_p(buf);
> -                    io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
> -                    l = 4;
> -                } else if (l >= 2 && ((addr & 1) == 0)) {
> -                    /* 16 bit write access */
> -                    val = lduw_p(buf);
> -                    io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
> -                    l = 2;
> +                if (pd & IO_MEM_REVERSE) {
> +                    /* Specific case for reverse endian page write */
> +                    addr1 = (pd & TARGET_PAGE_MASK) +
> +                        (addr & ~TARGET_PAGE_MASK);
> +                    ptr = phys_ram_base + addr1;
> +                    for (; l >= 4; l -= 4) {
> +                        stlr_p(ptr, *(uint32_t *)buf);
> +                        ptr += 4;
> +                        buf += 4;
> +                    }
> +                    for (; l >= 2; l -= 2) {
> +                        stwr_p(ptr, *(uint16_t *)buf);
> +                        ptr += 2;
> +                        buf += 2;
> +                    }
> +                    if (l >= 1)
> +                        *ptr = *buf;
> +                    goto invalidate_code;
>                  } else {
> -                    /* 8 bit write access */
> -                    val = ldub_p(buf);
> -                    io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
> -                    l = 1;
> +                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
> +                    /* XXX: could force cpu_single_env to NULL to avoid
> +                       potential bugs */
> +                    if (l >= 4 && ((addr & 3) == 0)) {
> +                        /* 32 bit write access */
> +                        val = ldul_p(buf);
> +                        io_mem_write[io_index][2](io_mem_opaque[io_index],
> +                                                  addr, val);
> +                        l = 4;
> +                    } else if (l >= 2 && ((addr & 1) == 0)) {
> +                        /* 16 bit write access */
> +                        val = lduw_p(buf);
> +                        io_mem_write[io_index][1](io_mem_opaque[io_index],
> +                                                  addr, val);
> +                        l = 2;
> +                    } else {
> +                        /* 8 bit write access */
> +                        val = ldub_p(buf);
> +                        io_mem_write[io_index][0](io_mem_opaque[io_index],
> +                                                  addr, val);
> +                        l = 1;
> +                    }
>                  }
>              } else {
> -                unsigned long addr1;
>                  addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
>                  /* RAM case */
>                  ptr = phys_ram_base + addr1;
>                  memcpy(ptr, buf, l);
> +            invalidate_code:
>                  if (!cpu_physical_memory_is_dirty(addr1)) {
>                      /* invalidate code */
>                      tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
> @@ -2560,23 +2583,45 @@ void cpu_physical_memory_rw(target_phys_
>          } else {
>              if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
>                  !(pd & IO_MEM_ROMD)) {
> -                /* I/O case */
> -                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
> -                if (l >= 4 && ((addr & 3) == 0)) {
> -                    /* 32 bit read access */
> -                    val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
> -                    stl_p(buf, val);
> -                    l = 4;
> -                } else if (l >= 2 && ((addr & 1) == 0)) {
> -                    /* 16 bit read access */
> -                    val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
> -                    stw_p(buf, val);
> -                    l = 2;
> +                if (pd & IO_MEM_REVERSE) {
> +                    /* Specific case for reverse endian page write */
> +                    addr1 = (pd & TARGET_PAGE_MASK) +
> +                        (addr & ~TARGET_PAGE_MASK);
> +                    ptr = phys_ram_base + addr1;
> +                    for (; l >= 4; l -= 4) {
> +                        *(uint32_t *)buf = ldulr_p(ptr);
> +                        ptr += 4;
> +                        buf += 4;
> +                    }
> +                    for (; l >= 2; l -= 2) {
> +                        *(uint16_t *)buf = lduwr_p(ptr);
> +                        ptr += 2;
> +                        buf += 2;
> +                    }
> +                    if (l >= 1)
> +                        *buf = *ptr;
>                  } else {
> -                    /* 8 bit read access */
> -                    val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr);
> -                    stb_p(buf, val);
> -                    l = 1;
> +                    /* I/O case */
> +                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
> +                    if (l >= 4 && ((addr & 3) == 0)) {
> +                        /* 32 bit read access */
> +                        val = io_mem_read[io_index][2](io_mem_opaque[io_index],
> +                                                       addr);
> +                        stl_p(buf, val);
> +                        l = 4;
> +                    } else if (l >= 2 && ((addr & 1) == 0)) {
> +                        /* 16 bit read access */
> +                        val = io_mem_read[io_index][1](io_mem_opaque[io_index],
> +                                                       addr);
> +                        stw_p(buf, val);
> +                        l = 2;
> +                    } else {
> +                        /* 8 bit read access */
> +                        val = io_mem_read[io_index][0](io_mem_opaque[io_index],
> +                                                       addr);
> +                        stb_p(buf, val);
> +                        l = 1;
> +                    }
>                  }
>              } else {
>                  /* RAM case */
> @@ -2632,7 +2677,7 @@ void cpu_physical_memory_write_rom(targe
>  
>  
>  /* warning: addr must be aligned */
> -uint32_t ldl_phys(target_phys_addr_t addr)
> +uint32_t ldul_phys(target_phys_addr_t addr)
>  {
>      int io_index;
>      uint8_t *ptr;
> @@ -2656,13 +2701,13 @@ uint32_t ldl_phys(target_phys_addr_t add
>          /* RAM case */
>          ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
>              (addr & ~TARGET_PAGE_MASK);
> -        val = ldl_p(ptr);
> +        val = ldul_p(ptr);
>      }
>      return val;
>  }
>  
>  /* warning: addr must be aligned */
> -uint64_t ldq_phys(target_phys_addr_t addr)
> +uint64_t lduq_phys(target_phys_addr_t addr)
>  {
>      int io_index;
>      uint8_t *ptr;
> @@ -2692,7 +2737,7 @@ uint64_t ldq_phys(target_phys_addr_t add
>          /* RAM case */
>          ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
>              (addr & ~TARGET_PAGE_MASK);
> -        val = ldq_p(ptr);
> +        val = lduq_p(ptr);
>      }
>      return val;
>  }
> @@ -2907,6 +2952,7 @@ void dump_exec_info(FILE *f,
>  #define env cpu_single_env
>  #define SOFTMMU_CODE_ACCESS
>  
> +/* Native-endian */
>  #define SHIFT 0
>  #include "softmmu_template.h"
>  
> @@ -2919,6 +2965,21 @@ void dump_exec_info(FILE *f,
>  #define SHIFT 3
>  #include "softmmu_template.h"
>  
> +/* Reverse-endian */
> +#define REVERSE_ENDIAN
> +#define SHIFT 0
> +#include "softmmu_template.h"
> +
> +#define SHIFT 1
> +#include "softmmu_template.h"
> +
> +#define SHIFT 2
> +#include "softmmu_template.h"
> +
> +#define SHIFT 3
> +#include "softmmu_template.h"
> +#undef REVERSE_ENDIAN
> +
>  #undef env
>  
>  #endif
> Index: monitor.c
> ===================================================================
> RCS file: /sources/qemu/qemu/monitor.c,v
> retrieving revision 1.83
> diff -u -d -d -p -r1.83 monitor.c
> --- monitor.c	25 Sep 2007 17:28:42 -0000	1.83
> +++ monitor.c	16 Oct 2007 11:39:03 -0000
> @@ -595,10 +595,10 @@ static void memory_dump(int count, int f
>                  v = lduw_raw(buf + i);
>                  break;
>              case 4:
> -                v = (uint32_t)ldl_raw(buf + i);
> +                v = (uint32_t)ldul_raw(buf + i);
>                  break;
>              case 8:
> -                v = ldq_raw(buf + i);
> +                v = lduq_raw(buf + i);
>                  break;
>              }
>              term_printf(" ");
> Index: nommu_header.h
> ===================================================================
> RCS file: nommu_header.h
> diff -N nommu_header.h
> --- /dev/null	1 Jan 1970 00:00:00 -0000
> +++ nommu_header.h	16 Oct 2007 11:39:03 -0000
> @@ -0,0 +1,51 @@
> +/*
> + *  No MMU support definitions
> + *
> + *  Copyright (c) 2003-2007 Fabrice Bellard
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +/* Native-endian accesses */
> +#define ESUFFIX
> +#define LSUFFIX _raw
> +#define LADDR_TYPE (void *)
> +#include "nommu_template.h"
> +/* Reverse-endian accesses */
> +#define ESUFFIX r
> +#define LSUFFIX r_raw
> +#define LADDR_TYPE (void *)
> +#include "nommu_template.h"
> +/* big-endian accesses */
> +#define ESUFFIX _be
> +#define LADDR_TYPE (unsigned long)
> +#ifdef TARGET_WORDS_BIGENDIAN
> +#define LSUFFIX
> +#else
> +#define LSUFFIX r
> +#endif
> +#include "nommu_template.h"
> +/* little-endian accesses */
> +#define ESUFFIX _le
> +#define LADDR_TYPE (unsigned long)
> +#ifdef TARGET_WORDS_BIGENDIAN
> +#define LSUFFIX r
> +#else
> +#define LSUFFIX
> +#endif
> +#include "nommu_template.h"
> +
> +#undef ADDR_TYPE
> +#undef SUFFIX
> Index: nommu_template.h
> ===================================================================
> RCS file: nommu_template.h
> diff -N nommu_template.h
> --- /dev/null	1 Jan 1970 00:00:00 -0000
> +++ nommu_template.h	16 Oct 2007 11:39:03 -0000
> @@ -0,0 +1,109 @@
> +/*
> + *  No MMU support helpers
> + *
> + *  Copyright (c) 2003-2007 Fabrice Bellard
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +/* XXX: fix this */
> +#ifndef glue
> +#define xglue(x, y) x ## y
> +#define glue(x, y) xglue(x, y)
> +#endif
> +
> +static inline int glue(glue(ldub, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
> +{
> +    return glue(ldub, LSUFFIX)(LADDR_TYPE ptr);
> +}
> +
> +static inline int glue(glue(ldsb, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
> +{
> +    return glue(ldsb, LSUFFIX)(LADDR_TYPE ptr);
> +}
> +
> +static inline int glue(glue(lduw, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
> +{
> +    return glue(lduw, LSUFFIX)(LADDR_TYPE ptr);
> +}
> +
> +static inline int glue(glue(ldsw, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
> +{
> +    return glue(ldsw, LSUFFIX)(LADDR_TYPE ptr);
> +}
> +
> +static inline target_long glue(glue(ldul, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
> +{
> +    return glue(ldul, LSUFFIX)(LADDR_TYPE ptr);
> +}
> +
> +static inline target_long glue(glue(ldsl, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
> +{
> +    return glue(ldsl, LSUFFIX)(LADDR_TYPE ptr);
> +}
> +
> +static inline uint64_t glue(glue(lduq, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
> +{
> +    return glue(lduq, LSUFFIX)(LADDR_TYPE ptr);
> +}
> +
> +static inline int64_t glue(glue(ldsq, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
> +{
> +    return glue(ldsq, LSUFFIX)(LADDR_TYPE ptr);
> +}
> +
> +static inline float32 glue(glue(ldfl, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
> +{
> +    return glue(ldfl, LSUFFIX)(LADDR_TYPE ptr);
> +}
> +
> +static inline float64 glue(glue(ldfq, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
> +{
> +    return glue(ldfq, LSUFFIX)(LADDR_TYPE ptr);
> +}
> +
> +static inline void glue(glue(stb, ESUFFIX), SUFFIX) (ADDR_TYPE ptr, int v)
> +{
> +    glue(stb, LSUFFIX)(LADDR_TYPE ptr, v);
> +}
> +
> +static inline void glue(glue(stw, ESUFFIX), SUFFIX) (ADDR_TYPE ptr, int v)
> +{
> +    glue(stw, LSUFFIX)(LADDR_TYPE ptr, v);
> +}
> +
> +static inline void glue(glue(stl, ESUFFIX), SUFFIX) (ADDR_TYPE ptr, int v)
> +{
> +    glue(stl, LSUFFIX)(LADDR_TYPE ptr, v);
> +}
> +
> +static inline void glue(glue(stq, ESUFFIX), SUFFIX) (ADDR_TYPE ptr, uint64_t v)
> +{
> +    glue(stq, LSUFFIX)(LADDR_TYPE ptr, v);
> +}
> +
> +static inline void glue(glue(stfl, ESUFFIX), SUFFIX) (ADDR_TYPE ptr, float32 v)
> +{
> +    glue(stfl, LSUFFIX)(LADDR_TYPE ptr, v);
> +}
> +
> +static inline void glue(glue(stfq, ESUFFIX), SUFFIX) (ADDR_TYPE ptr, float64 v)
> +{
> +    glue(stfq, LSUFFIX)(LADDR_TYPE ptr, v);
> +}
> +
> +#undef LSUFFIX
> +#undef LADDR_TYPE
> +#undef ESUFFIX
> Index: softmmu_exec.h
> ===================================================================
> RCS file: /sources/qemu/qemu/softmmu_exec.h,v
> retrieving revision 1.2
> diff -u -d -d -p -r1.2 softmmu_exec.h
> --- softmmu_exec.h	14 Oct 2007 07:07:05 -0000	1.2
> +++ softmmu_exec.h	16 Oct 2007 11:39:03 -0000
> @@ -1,14 +1,5 @@
>  /* Common softmmu definitions and inline routines.  */
>  
> -/* XXX: find something cleaner.
> - * Furthermore, this is false for 64 bits targets
> - */
> -#define ldul_user       ldl_user
> -#define ldul_kernel     ldl_kernel
> -#define ldul_hypv       ldl_hypv
> -#define ldul_executive  ldl_executive
> -#define ldul_supervisor ldl_supervisor
> -
>  #define ACCESS_TYPE 0
>  #define MEMSUFFIX MMU_MODE0_SUFFIX
>  #define DATA_SIZE 1
> @@ -104,8 +95,10 @@
>  #define ldsb(p) ldsb_data(p)
>  #define lduw(p) lduw_data(p)
>  #define ldsw(p) ldsw_data(p)
> -#define ldl(p) ldl_data(p)
> -#define ldq(p) ldq_data(p)
> +#define ldul(p) ldul_data(p)
> +#define ldsl(p) ldsl_data(p)
> +#define lduq(p) lduq_data(p)
> +#define ldsq(p) ldsq_data(p)
>  
>  #define stb(p, v) stb_data(p, v)
>  #define stw(p, v) stw_data(p, v)
> Index: softmmu_header.h
> ===================================================================
> RCS file: /sources/qemu/qemu/softmmu_header.h,v
> retrieving revision 1.18
> diff -u -d -d -p -r1.18 softmmu_header.h
> --- softmmu_header.h	14 Oct 2007 07:07:05 -0000	1.18
> +++ softmmu_header.h	16 Oct 2007 11:39:03 -0000
> @@ -17,27 +17,84 @@
>   * License along with this library; if not, write to the Free Software
>   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
>   */
> +
> +#if !defined(REVERSE_ENDIAN)
> +/* native-endian */
> +#if defined(TARGET_WORDS_BIGENDIAN)
> +#define ESUFFIX _be
> +#else
> +#define ESUFFIX _le
> +#endif
>  #if DATA_SIZE == 8
>  #define SUFFIX q
> -#define USUFFIX q
> +#define USUFFIX uq
> +#define LSUFFIX q
> +#define LUSUFFIX uq
> +#define DATA_STYPE int64_t
>  #define DATA_TYPE uint64_t
>  #elif DATA_SIZE == 4
>  #define SUFFIX l
> -#define USUFFIX l
> +#define USUFFIX ul
> +#define LSUFFIX l
> +#define LUSUFFIX ul
> +#define DATA_STYPE int32_t
>  #define DATA_TYPE uint32_t
>  #elif DATA_SIZE == 2
>  #define SUFFIX w
>  #define USUFFIX uw
> +#define LSUFFIX w
> +#define LUSUFFIX uw
>  #define DATA_TYPE uint16_t
>  #define DATA_STYPE int16_t
>  #elif DATA_SIZE == 1
>  #define SUFFIX b
>  #define USUFFIX ub
> +#define LSUFFIX b
> +#define LUSUFFIX ub
> +#define DATA_TYPE uint8_t
> +#define DATA_STYPE int8_t
> +#else
> +#error unsupported data size
> +#endif
> +#else /* !defined(REVERSE_ENDIAN) */
> +/* reverse-endian */
> +#if defined(TARGET_WORDS_BIGENDIAN)
> +#define ESUFFIX _le
> +#else
> +#define ESUFFIX _be
> +#endif
> +#if DATA_SIZE == 8
> +#define SUFFIX qr
> +#define USUFFIX uqr
> +#define LSUFFIX q
> +#define LUSUFFIX uq
> +#define DATA_STYPE int64_t
> +#define DATA_TYPE uint64_t
> +#elif DATA_SIZE == 4
> +#define SUFFIX lr
> +#define USUFFIX ulr
> +#define LSUFFIX l
> +#define LUSUFFIX ul
> +#define DATA_STYPE int32_t
> +#define DATA_TYPE uint32_t
> +#elif DATA_SIZE == 2
> +#define SUFFIX wr
> +#define USUFFIX uwr
> +#define LSUFFIX w
> +#define LUSUFFIX uw
> +#define DATA_TYPE uint16_t
> +#define DATA_STYPE int16_t
> +#elif DATA_SIZE == 1
> +#define SUFFIX br
> +#define USUFFIX ubr
> +#define LSUFFIX b
> +#define LUSUFFIX ub
>  #define DATA_TYPE uint8_t
>  #define DATA_STYPE int8_t
>  #else
>  #error unsupported data size
>  #endif
> +#endif /* defined(REVERSE_ENDIAN) */
>  
>  #if ACCESS_TYPE < (NB_MMU_MODES)
>  
> @@ -121,7 +178,6 @@ static inline RES_TYPE glue(glue(ld, USU
>      return res;
>  }
>  
> -#if DATA_SIZE <= 2
>  static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
>  {
>      int res;
> @@ -167,7 +223,6 @@ static inline int glue(glue(lds, SUFFIX)
>                    : "%eax", "%ecx", "%edx", "memory", "cc");
>      return res;
>  }
> -#endif
>  
>  static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
>  {
> @@ -244,7 +299,6 @@ static inline RES_TYPE glue(glue(ld, USU
>      return res;
>  }
>  
> -#if DATA_SIZE <= 2
>  static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
>  {
>      int res, index;
> @@ -264,7 +318,6 @@ static inline int glue(glue(lds, SUFFIX)
>      }
>      return res;
>  }
> -#endif
>  
>  #if ACCESS_TYPE != (NB_MMU_MODES + 1)
>  
> @@ -293,8 +346,27 @@ static inline void glue(glue(st, SUFFIX)
>  
>  #endif /* !asm */
>  
> +/* BE/LE access routines */
> +static inline RES_TYPE glue(glue(glue(ld, LUSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr)
> +{
> +    return glue(glue(ld, USUFFIX), MEMSUFFIX)(ptr);
> +}
> +
> +static inline RES_TYPE glue(glue(glue(lds, LSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr)
> +{
> +    return glue(glue(lds, SUFFIX), MEMSUFFIX)(ptr);
> +}
> +
>  #if ACCESS_TYPE != (NB_MMU_MODES + 1)
> +static inline void glue(glue(glue(st, LSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
> +{
> +    glue(glue(st, SUFFIX), MEMSUFFIX)(ptr, v);
> +}
> +#endif
>  
> +#if ACCESS_TYPE != (NB_MMU_MODES + 1)
> +
> +#if !defined(REVERSE_ENDIAN)
>  #if DATA_SIZE == 8
>  static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
>  {
> @@ -302,10 +374,15 @@ static inline float64 glue(ldfq, MEMSUFF
>          float64 d;
>          uint64_t i;
>      } u;
> -    u.i = glue(ldq, MEMSUFFIX)(ptr);
> +    u.i = glue(lduq, MEMSUFFIX)(ptr);
>      return u.d;
>  }
>  
> +static inline float64 glue(glue(ldfq, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
> +{
> +    return glue(ldfq, MEMSUFFIX)(ptr);
> +}
> +
>  static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v)
>  {
>      union {
> @@ -315,6 +392,12 @@ static inline void glue(stfq, MEMSUFFIX)
>      u.d = v;
>      glue(stq, MEMSUFFIX)(ptr, u.i);
>  }
> +
> +static inline void glue(glue(stfq, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
> +                                                        float64 v)
> +{
> +    glue(stfq, MEMSUFFIX)(ptr, v);
> +}
>  #endif /* DATA_SIZE == 8 */
>  
>  #if DATA_SIZE == 4
> @@ -324,10 +407,15 @@ static inline float32 glue(ldfl, MEMSUFF
>          float32 f;
>          uint32_t i;
>      } u;
> -    u.i = glue(ldl, MEMSUFFIX)(ptr);
> +    u.i = glue(ldul, MEMSUFFIX)(ptr);
>      return u.f;
>  }
>  
> +static inline float32 glue(glue(ldfl, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
> +{
> +    return glue(ldfl, MEMSUFFIX)(ptr);
> +}
> +
>  static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
>  {
>      union {
> @@ -337,8 +425,84 @@ static inline void glue(stfl, MEMSUFFIX)
>      u.f = v;
>      glue(stl, MEMSUFFIX)(ptr, u.i);
>  }
> +
> +static inline void glue(glue(stfl, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
> +                                                        float32 v)
> +{
> +    glue(stfl, MEMSUFFIX)(ptr, v);
> +}
>  #endif /* DATA_SIZE == 4 */
>  
> +#else /* defined(REVERSE_ENDIAN) */
> +
> +#if DATA_SIZE == 8
> +static inline float64 glue(ldfqr, MEMSUFFIX)(target_ulong ptr)
> +{
> +    union {
> +        float64 d;
> +        uint64_t i;
> +    } u;
> +    u.i = glue(lduqr, MEMSUFFIX)(ptr);
> +    return u.d;
> +}
> +
> +static inline float64 glue(glue(ldfqr, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
> +{
> +    return glue(ldfqr, MEMSUFFIX)(ptr);
> +}
> +
> +static inline void glue(stfqr, MEMSUFFIX)(target_ulong ptr, float64 v)
> +{
> +    union {
> +        float64 d;
> +        uint64_t i;
> +    } u;
> +    u.d = v;
> +    glue(stqr, MEMSUFFIX)(ptr, u.i);
> +}
> +
> +static inline void glue(glue(stfqr, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
> +                                                         float64 v)
> +{
> +    glue(stfqr, MEMSUFFIX)(ptr, v);
> +}
> +#endif /* DATA_SIZE == 8 */
> +
> +#if DATA_SIZE == 4
> +static inline float32 glue(ldflr, MEMSUFFIX)(target_ulong ptr)
> +{
> +    union {
> +        float32 f;
> +        uint32_t i;
> +    } u;
> +    u.i = glue(ldulr, MEMSUFFIX)(ptr);
> +    return u.f;
> +}
> +
> +static inline float32 glue(glue(ldflr, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
> +{
> +    return glue(ldflr, MEMSUFFIX)(ptr);
> +}
> +
> +static inline void glue(stflr, MEMSUFFIX)(target_ulong ptr, float32 v)
> +{
> +    union {
> +        float32 f;
> +        uint32_t i;
> +    } u;
> +    u.f = v;
> +    glue(stlr, MEMSUFFIX)(ptr, u.i);
> +}
> +
> +static inline void glue(glue(stflr, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
> +                                                         float32 v)
> +{
> +    glue(stflr, MEMSUFFIX)(ptr, v);
> +}
> +#endif /* DATA_SIZE == 4 */
> +
> +#endif /* defined(REVERSE_ENDIAN) */
> +
>  #endif /* ACCESS_TYPE != (NB_MMU_MODES + 1) */
>  
>  #undef RES_TYPE
> @@ -346,7 +510,10 @@ static inline void glue(stfl, MEMSUFFIX)
>  #undef DATA_STYPE
>  #undef SUFFIX
>  #undef USUFFIX
> +#undef LSUFFIX
> +#undef LUSUFFIX
>  #undef DATA_SIZE
>  #undef CPU_MMU_INDEX
>  #undef MMUSUFFIX
> +#undef ESUFFIX
>  #undef ADDR_READ
> Index: softmmu_template.h
> ===================================================================
> RCS file: /sources/qemu/qemu/softmmu_template.h,v
> retrieving revision 1.19
> diff -u -d -d -p -r1.19 softmmu_template.h
> --- softmmu_template.h	14 Oct 2007 07:07:05 -0000	1.19
> +++ softmmu_template.h	16 Oct 2007 11:39:03 -0000
> @@ -19,25 +19,66 @@
>   */
>  #define DATA_SIZE (1 << SHIFT)
>  
> +#if !defined(REVERSE_ENDIAN)
> +/* native-endian */
>  #if DATA_SIZE == 8
>  #define SUFFIX q
> -#define USUFFIX q
> +#define USUFFIX uq
> +#define RSUFFIX qr
> +#define URSUFFIX uqr
>  #define DATA_TYPE uint64_t
>  #elif DATA_SIZE == 4
>  #define SUFFIX l
> -#define USUFFIX l
> +#define USUFFIX ul
> +#define RSUFFIX lr
> +#define URSUFFIX ulr
>  #define DATA_TYPE uint32_t
>  #elif DATA_SIZE == 2
>  #define SUFFIX w
>  #define USUFFIX uw
> +#define RSUFFIX wr
> +#define URSUFFIX uwr
>  #define DATA_TYPE uint16_t
>  #elif DATA_SIZE == 1
>  #define SUFFIX b
>  #define USUFFIX ub
> +#define RSUFFIX br
> +#define URSUFFIX ubr
>  #define DATA_TYPE uint8_t
>  #else
>  #error unsupported data size
>  #endif
> +#else /* !defined(REVERSE_ENDIAN) */
> +/* reverse-endian */
> +#if DATA_SIZE == 8
> +#define SUFFIX qr
> +#define USUFFIX uqr
> +#define RSUFFIX q
> +#define URSUFFIX uq
> +#define DATA_TYPE uint64_t
> +#elif DATA_SIZE == 4
> +#define SUFFIX lr
> +#define USUFFIX ulr
> +#define RSUFFIX l
> +#define URSUFFIX ul
> +#define DATA_TYPE uint32_t
> +#elif DATA_SIZE == 2
> +#define SUFFIX wr
> +#define USUFFIX uwr
> +#define RSUFFIX w
> +#define URSUFFIX uw
> +#define DATA_TYPE uint16_t
> +#elif DATA_SIZE == 1
> +#define SUFFIX br
> +#define USUFFIX ubr
> +#define RSUFFIX b
> +#define URSUFFIX ub
> +#define DATA_TYPE uint8_t
> +#else
> +#error unsupported data size
> +#endif
> +#endif /* defined(REVERSE_ENDIAN) */
> +
>  
>  #ifdef SOFTMMU_CODE_ACCESS
>  #define READ_ACCESS_TYPE 2
> @@ -47,25 +88,62 @@
>  #define ADDR_READ addr_read
>  #endif
>  
> +#if (defined(TARGET_WORDS_BIGENDIAN) && !defined(REVERSE_ENDIAN)) || \
> +    (!defined(TARGET_WORDS_BIGENDIAN) && defined(REVERSE_ENDIAN))
> +#define ACCESS_WORDS_BIGENDIAN
> +#endif
> +
> +/* Beware: we do not have reverse-endian accessors for IOs */
> +#if defined(REVERSE_ENDIAN)
> +#define DO_IOSWAP 1
> +#else
> +#define DO_IOSWAP 0
> +#endif
> +#if SHIFT == 1
> +#define IOSWAP(val) bswap16(val)
> +#elif SHIFT >= 2
> +#define IOSWAP(val) bswap32(val)
> +#else
> +#define IOSWAP(val) (val)
> +#endif
> +
>  static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
>                                                          int mmu_idx,
>                                                          void *retaddr);
>  static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
> -                                              target_ulong tlb_addr)
> +                                              target_ulong tlb_addr,
> +                                              int do_ioswap)
>  {
>      DATA_TYPE res;
> +#if SHIFT > 2
> +    uint32_t tmp;
> +#endif
>      int index;
>  
>      index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
>  #if SHIFT <= 2
>      res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
> +    if (do_ioswap != DO_IOSWAP)
> +        res = IOSWAP(res);
>  #else
> -#ifdef TARGET_WORDS_BIGENDIAN
> -    res = (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr) << 32;
> -    res |= io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
> +#ifdef ACCESS_WORDS_BIGENDIAN
> +    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
> +    if (do_ioswap != DO_IOSWAP)
> +        tmp = IOSWAP(tmp);
> +    res = (uint64_t)tmp << 32;
> +    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
> +    if (do_ioswap != DO_IOSWAP)
> +        tmp = IOSWAP(tmp);
> +    res |= tmp;
>  #else
> -    res = io_mem_read[index][2](io_mem_opaque[index], physaddr);
> -    res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
> +    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
> +    if (do_ioswap != DO_IOSWAP)
> +        tmp = IOSWAP(tmp);
> +    res = tmp;
> +    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
> +    if (do_ioswap != DO_IOSWAP)
> +        tmp = IOSWAP(tmp);
> +    res |= (uint64_t)tmp << 32;
>  #endif
>  #endif /* SHIFT > 2 */
>  #ifdef USE_KQEMU
> @@ -92,10 +170,34 @@ DATA_TYPE REGPARM(1) glue(glue(__ld, SUF
>      if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
>          physaddr = addr + env->tlb_table[mmu_idx][index].addend;
>          if (tlb_addr & ~TARGET_PAGE_MASK) {
> -            /* IO access */
> -            if ((addr & (DATA_SIZE - 1)) != 0)
> -                goto do_unaligned_access;
> -            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
> +            if (tlb_addr & IO_MEM_REVERSE) {
> +                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
> +                    /* Specific case for reverse endian IO read */
> +                    if ((addr & (DATA_SIZE - 1)) != 0)
> +                        goto do_unaligned_access;
> +                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
> +                } else {
> +                    /* Specific case for reverse endian page read */
> +                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
> +                        TARGET_PAGE_SIZE) {
> +                        /* slow unaligned access (it spans two pages or IO) */
> +                        goto do_unaligned_access;
> +                    }
> +#ifdef ALIGNED_ONLY
> +                    if ((addr & (DATA_SIZE - 1)) != 0) {
> +                        retaddr = GETPC();
> +                        do_unaligned_access(addr, READ_ACCESS_TYPE,
> +                                            mmu_idx, retaddr);
> +                    }
> +#endif
> +                    res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
> +                }
> +            } else {
> +                /* IO access */
> +                if ((addr & (DATA_SIZE - 1)) != 0)
> +                    goto do_unaligned_access;
> +                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
> +            }
>          } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
>              /* slow unaligned access (it spans two pages or IO) */
>          do_unaligned_access:
> @@ -144,10 +246,45 @@ static DATA_TYPE glue(glue(slow_ld, SUFF
>      if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
>          physaddr = addr + env->tlb_table[mmu_idx][index].addend;
>          if (tlb_addr & ~TARGET_PAGE_MASK) {
> -            /* IO access */
> -            if ((addr & (DATA_SIZE - 1)) != 0)
> -                goto do_unaligned_access;
> -            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
> +            if (tlb_addr & IO_MEM_REVERSE) {
> +                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
> +                    /* Specific case for reverse endian IO read */
> +                    if ((addr & (DATA_SIZE - 1)) != 0)
> +                        goto do_unaligned_access;
> +                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
> +                } else {
> +                    /* Specific case for reverse endian page read */
> +                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
> +                        TARGET_PAGE_SIZE) {
> +                        /* slow unaligned access (it spans two pages) */
> +                        addr1 = addr & ~(DATA_SIZE - 1);
> +                        addr2 = addr1 + DATA_SIZE;
> +                        res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
> +                                                                      mmu_idx,
> +                                                                      retaddr);
> +                        res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
> +                                                                      mmu_idx,
> +                                                                      retaddr);
> +                        shift = (addr & (DATA_SIZE - 1)) * 8;
> +#ifdef ACCESS_WORDS_BIGENDIAN
> +                        res = (res1 >> shift) |
> +                            (res2 << ((DATA_SIZE * 8) - shift));
> +#else
> +                        res = (res1 << shift) |
> +                            (res2 >> ((DATA_SIZE * 8) - shift));
> +#endif
> +                        res = (DATA_TYPE)res;
> +                    } else {
> +                        /* unaligned/aligned access in the same page */
> +                        res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
> +                    }
> +                }
> +            } else {
> +                /* IO access */
> +                if ((addr & (DATA_SIZE - 1)) != 0)
> +                    goto do_unaligned_access;
> +                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
> +            }
>          } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
>          do_unaligned_access:
>              /* slow unaligned access (it spans two pages) */
> @@ -158,7 +295,7 @@ static DATA_TYPE glue(glue(slow_ld, SUFF
>              res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
>                                                            mmu_idx, retaddr);
>              shift = (addr & (DATA_SIZE - 1)) * 8;
> -#ifdef TARGET_WORDS_BIGENDIAN
> +#ifdef ACCESS_WORDS_BIGENDIAN
>              res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
>  #else
>              res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
> @@ -186,22 +323,39 @@ static void glue(glue(slow_st, SUFFIX), 
>  static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
>                                            DATA_TYPE val,
>                                            target_ulong tlb_addr,
> -                                          void *retaddr)
> +                                          void *retaddr, int do_ioswap)
>  {
> +#if SHIFT > 2
> +    uint32_t tmp;
> +#endif
>      int index;
>  
>      index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
>      env->mem_write_vaddr = tlb_addr;
>      env->mem_write_pc = (unsigned long)retaddr;
>  #if SHIFT <= 2
> +    if (do_ioswap != DO_IOSWAP)
> +        val = IOSWAP(val);
>      io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val);
>  #else
> -#ifdef TARGET_WORDS_BIGENDIAN
> -    io_mem_write[index][2](io_mem_opaque[index], physaddr, val >> 32);
> -    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val);
> +#ifdef ACCESS_WORDS_BIGENDIAN
> +    tmp = val >> 32;
> +    if (do_ioswap != DO_IOSWAP)
> +        tmp = IOSWAP(tmp);
> +    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
> +    tmp = val;
> +    if (do_ioswap != DO_IOSWAP)
> +        tmp = IOSWAP(tmp);
> +    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
>  #else
> -    io_mem_write[index][2](io_mem_opaque[index], physaddr, val);
> -    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
> +    tmp = val;
> +    if (do_ioswap != DO_IOSWAP)
> +        tmp = IOSWAP(tmp);
> +    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
> +    tmp = val >> 32;
> +    if (do_ioswap != DO_IOSWAP)
> +        tmp = IOSWAP(tmp);
> +    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
>  #endif
>  #endif /* SHIFT > 2 */
>  #ifdef USE_KQEMU
> @@ -224,12 +378,37 @@ void REGPARM(2) glue(glue(__st, SUFFIX),
>      if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
>          physaddr = addr + env->tlb_table[mmu_idx][index].addend;
>          if (tlb_addr & ~TARGET_PAGE_MASK) {
> -            /* IO access */
> -            if ((addr & (DATA_SIZE - 1)) != 0)
> -                goto do_unaligned_access;
> -            retaddr = GETPC();
> -            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
> +            if (tlb_addr & IO_MEM_REVERSE) {
> +                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
> +                    /* Specific case for reverse endian IO write */
> +                    if ((addr & (DATA_SIZE - 1)) != 0)
> +                        goto do_unaligned_access;
> +                    retaddr = GETPC();
> +                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
> +                                           1);
> +                } else {
> +                    /* Specific case for reverse endian page write */
> +                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
> +                        TARGET_PAGE_SIZE) {
> +                        goto do_unaligned_access;
> +                    }
> +#ifdef ALIGNED_ONLY
> +                    if ((addr & (DATA_SIZE - 1)) != 0) {
> +                        retaddr = GETPC();
> +                        do_unaligned_access(addr, 1, mmu_idx, retaddr);
> +                    }
> +#endif
> +                    glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr, val);
> +                }
> +            } else {
> +                /* IO access */
> +                if ((addr & (DATA_SIZE - 1)) != 0)
> +                    goto do_unaligned_access;
> +                retaddr = GETPC();
> +                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
> +            }
>          } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
> +            /* slow unaligned access (it spans two pages or IO) */
>          do_unaligned_access:
>              retaddr = GETPC();
>  #ifdef ALIGNED_ONLY
> @@ -275,15 +454,48 @@ static void glue(glue(slow_st, SUFFIX), 
>      if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
>          physaddr = addr + env->tlb_table[mmu_idx][index].addend;
>          if (tlb_addr & ~TARGET_PAGE_MASK) {
> -            /* IO access */
> -            if ((addr & (DATA_SIZE - 1)) != 0)
> -                goto do_unaligned_access;
> -            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
> +            if (tlb_addr & IO_MEM_REVERSE) {
> +                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
> +                    /* Specific case for reverse endian IO write */
> +                    if ((addr & (DATA_SIZE - 1)) != 0)
> +                        goto do_unaligned_access;
> +                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
> +                                           1);
> +                } else {
> +                    /* Specific case for reverse endian page write */
> +                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
> +                        TARGET_PAGE_SIZE) {
> +                        /* slow unaligned access (it spans two pages or IO) */
> +                        /* XXX: not efficient, but simple */
> +                        for(i = 0;i < DATA_SIZE; i++) {
> +#ifdef ACCESS_WORDS_BIGENDIAN
> +                            glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
> +                                                      mmu_idx, retaddr);
> +#else
> +                            glue(slow_stb, MMUSUFFIX)(addr + i,
> +                                                      val >> (((DATA_SIZE - 1) * 8)
> +                                                              - (i * 8)),
> +                                                      mmu_idx, retaddr);
> +#endif
> +                        }
> +
> +                    } else {
> +                        /* aligned/unaligned access in the same page */
> +                        glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr,
> +                                                      val);
> +                    }
> +                }
> +            } else {
> +                /* IO access */
> +                if ((addr & (DATA_SIZE - 1)) != 0)
> +                    goto do_unaligned_access;
> +                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
> +            }
>          } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
>          do_unaligned_access:
>              /* XXX: not efficient, but simple */
>              for(i = 0;i < DATA_SIZE; i++) {
> -#ifdef TARGET_WORDS_BIGENDIAN
> +#ifdef ACCESS_WORDS_BIGENDIAN
>                  glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
>                                            mmu_idx, retaddr);
>  #else
> @@ -304,10 +516,15 @@ static void glue(glue(slow_st, SUFFIX), 
>  
>  #endif /* !defined(SOFTMMU_CODE_ACCESS) */
>  
> +#undef DO_IOSWAP
> +#undef IOSWAP
> +#undef ACCESS_WORDS_BIGENDIAN
>  #undef READ_ACCESS_TYPE
>  #undef SHIFT
>  #undef DATA_TYPE
>  #undef SUFFIX
>  #undef USUFFIX
> +#undef RSUFFIX
> +#undef URSUFFIX
>  #undef DATA_SIZE
>  #undef ADDR_READ
> Index: hw/eepro100.c
> ===================================================================
> RCS file: /sources/qemu/qemu/hw/eepro100.c,v
> retrieving revision 1.6
> diff -u -d -d -p -r1.6 eepro100.c
> --- hw/eepro100.c	16 Sep 2007 21:07:52 -0000	1.6
> +++ hw/eepro100.c	16 Oct 2007 11:39:04 -0000
> @@ -723,7 +723,7 @@ static void eepro100_cu_command(EEPRO100
>              uint32_t tbd_address = cb_address + 0x10;
>              assert(tcb_bytes <= sizeof(buf));
>              while (size < tcb_bytes) {
> -                uint32_t tx_buffer_address = ldl_phys(tbd_address);
> +                uint32_t tx_buffer_address = ldul_phys(tbd_address);
>                  uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
>                  //~ uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
>                  tbd_address += 8;
> @@ -743,7 +743,7 @@ static void eepro100_cu_command(EEPRO100
>                      /* Extended TCB. */
>                      assert(tcb_bytes == 0);
>                      for (; tbd_count < 2; tbd_count++) {
> -                        uint32_t tx_buffer_address = ldl_phys(tbd_address);
> +                        uint32_t tx_buffer_address = ldul_phys(tbd_address);
>                          uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
>                          uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
>                          tbd_address += 8;
> @@ -760,7 +760,7 @@ static void eepro100_cu_command(EEPRO100
>                  }
>                  tbd_address = tbd_array;
>                  for (; tbd_count < tx.tbd_count; tbd_count++) {
> -                    uint32_t tx_buffer_address = ldl_phys(tbd_address);
> +                    uint32_t tx_buffer_address = ldul_phys(tbd_address);
>                      uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
>                      uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
>                      tbd_address += 8;
> Index: hw/pc.c
> ===================================================================
> RCS file: /sources/qemu/qemu/hw/pc.c,v
> retrieving revision 1.87
> diff -u -d -d -p -r1.87 pc.c
> --- hw/pc.c	9 Oct 2007 03:08:56 -0000	1.87
> +++ hw/pc.c	16 Oct 2007 11:39:04 -0000
> @@ -477,8 +477,8 @@ static void load_linux(const char *kerne
>      }
>  
>      /* kernel protocol version */
> -    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
> -    if (ldl_p(header+0x202) == 0x53726448)
> +    fprintf(stderr, "header magic: %#x\n", ldul_p(header+0x202));
> +    if (ldul_p(header+0x202) == 0x53726448)
>  	protocol = lduw_p(header+0x206);
>      else
>  	protocol = 0;
> @@ -510,7 +510,7 @@ static void load_linux(const char *kerne
>  
>      /* highest address for loading the initrd */
>      if (protocol >= 0x203)
> -	initrd_max = ldl_p(header+0x22c);
> +	initrd_max = ldul_p(header+0x22c);
>      else
>  	initrd_max = 0x37ffffff;
>  
> Index: hw/pl080.c
> ===================================================================
> RCS file: /sources/qemu/qemu/hw/pl080.c,v
> retrieving revision 1.5
> diff -u -d -d -p -r1.5 pl080.c
> --- hw/pl080.c	16 Sep 2007 21:07:55 -0000	1.5
> +++ hw/pl080.c	16 Oct 2007 11:39:05 -0000
> @@ -162,10 +162,10 @@ again:
>              if (size == 0) {
>                  /* Transfer complete.  */
>                  if (ch->lli) {
> -                    ch->src = ldl_phys(ch->lli);
> -                    ch->dest = ldl_phys(ch->lli + 4);
> -                    ch->ctrl = ldl_phys(ch->lli + 12);
> -                    ch->lli = ldl_phys(ch->lli + 8);
> +                    ch->src = ldul_phys(ch->lli);
> +                    ch->dest = ldul_phys(ch->lli + 4);
> +                    ch->ctrl = ldul_phys(ch->lli + 12);
> +                    ch->lli = ldul_phys(ch->lli + 8);
>                  } else {
>                      ch->conf &= ~PL080_CCONF_E;
>                  }
> Index: hw/sun4m.c
> ===================================================================
> RCS file: /sources/qemu/qemu/hw/sun4m.c,v
> retrieving revision 1.55
> diff -u -d -d -p -r1.55 sun4m.c
> --- hw/sun4m.c	6 Oct 2007 11:28:21 -0000	1.55
> +++ hw/sun4m.c	16 Oct 2007 11:39:05 -0000
> @@ -465,7 +465,7 @@ static void sun4m_load_kernel(long vram_
>          }
>          if (initrd_size > 0) {
>              for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
> -                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
> +                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
>                      == 0x48647253) { // HdrS
>                      stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
>                      stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
> Index: hw/sun4u.c
> ===================================================================
> RCS file: /sources/qemu/qemu/hw/sun4u.c,v
> retrieving revision 1.22
> diff -u -d -d -p -r1.22 sun4u.c
> --- hw/sun4u.c	6 Oct 2007 11:28:21 -0000	1.22
> +++ hw/sun4u.c	16 Oct 2007 11:39:05 -0000
> @@ -418,7 +418,7 @@ static void sun4u_init(int ram_size, int
>          }
>          if (initrd_size > 0) {
>              for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
> -                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
> +                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
>                      == 0x48647253) { // HdrS
>                      stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
>                      stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
> Index: linux-user/elfload.c
> ===================================================================
> RCS file: /sources/qemu/qemu/linux-user/elfload.c,v
> retrieving revision 1.51
> diff -u -d -d -p -r1.51 elfload.c
> --- linux-user/elfload.c	9 Oct 2007 16:34:29 -0000	1.51
> +++ linux-user/elfload.c	16 Oct 2007 11:39:05 -0000
> @@ -322,8 +322,8 @@ static inline void init_thread(struct ta
>      _regs->msr = 1 << MSR_PR; /* Set user mode */
>      _regs->gpr[1] = infop->start_stack;
>  #ifdef TARGET_PPC64
> -    entry = ldq_raw(infop->entry) + infop->load_addr;
> -    toc = ldq_raw(infop->entry + 8) + infop->load_addr;
> +    entry = lduq_raw(infop->entry) + infop->load_addr;
> +    toc = lduq_raw(infop->entry + 8) + infop->load_addr;
>      _regs->gpr[2] = toc;
>      infop->entry = entry;
>  #endif
> @@ -336,7 +336,7 @@ static inline void init_thread(struct ta
>      pos += sizeof(target_ulong);
>      _regs->gpr[4] = pos;
>      for (tmp = 1; tmp != 0; pos += sizeof(target_ulong))
> -        tmp = ldl(pos);
> +        tmp = ldul(pos);
>      _regs->gpr[5] = pos;
>  }
>  
> Index: linux-user/qemu.h
> ===================================================================
> RCS file: /sources/qemu/qemu/linux-user/qemu.h,v
> retrieving revision 1.40
> diff -u -d -d -p -r1.40 qemu.h
> --- linux-user/qemu.h	9 Oct 2007 16:34:29 -0000	1.40
> +++ linux-user/qemu.h	16 Oct 2007 11:39:05 -0000
> @@ -313,15 +313,15 @@ static inline void *lock_user_string(tar
>  #define tput8(addr, val) stb(addr, val)
>  #define tget16(addr) lduw(addr)
>  #define tput16(addr, val) stw(addr, val)
> -#define tget32(addr) ldl(addr)
> +#define tget32(addr) ldul(addr)
>  #define tput32(addr, val) stl(addr, val)
> -#define tget64(addr) ldq(addr)
> +#define tget64(addr) lduq(addr)
>  #define tput64(addr, val) stq(addr, val)
>  #if TARGET_LONG_BITS == 64
> -#define tgetl(addr) ldq(addr)
> +#define tgetl(addr) lduq(addr)
>  #define tputl(addr, val) stq(addr, val)
>  #else
> -#define tgetl(addr) ldl(addr)
> +#define tgetl(addr) ldul(addr)
>  #define tputl(addr, val) stl(addr, val)
>  #endif
>  
> Index: linux-user/signal.c
> ===================================================================
> RCS file: /sources/qemu/qemu/linux-user/signal.c,v
> retrieving revision 1.45
> diff -u -d -d -p -r1.45 signal.c
> --- linux-user/signal.c	5 Oct 2007 17:01:51 -0000	1.45
> +++ linux-user/signal.c	16 Oct 2007 11:39:05 -0000
> @@ -878,28 +878,28 @@ restore_sigcontext(CPUX86State *env, str
>          cpu_x86_load_seg(env, R_ES, lduw(&sc->es));
>          cpu_x86_load_seg(env, R_DS, lduw(&sc->ds));
>  
> -        env->regs[R_EDI] = ldl(&sc->edi);
> -        env->regs[R_ESI] = ldl(&sc->esi);
> -        env->regs[R_EBP] = ldl(&sc->ebp);
> -        env->regs[R_ESP] = ldl(&sc->esp);
> -        env->regs[R_EBX] = ldl(&sc->ebx);
> -        env->regs[R_EDX] = ldl(&sc->edx);
> -        env->regs[R_ECX] = ldl(&sc->ecx);
> -        env->eip = ldl(&sc->eip);
> +        env->regs[R_EDI] = ldul(&sc->edi);
> +        env->regs[R_ESI] = ldul(&sc->esi);
> +        env->regs[R_EBP] = ldul(&sc->ebp);
> +        env->regs[R_ESP] = ldul(&sc->esp);
> +        env->regs[R_EBX] = ldul(&sc->ebx);
> +        env->regs[R_EDX] = ldul(&sc->edx);
> +        env->regs[R_ECX] = ldul(&sc->ecx);
> +        env->eip = ldul(&sc->eip);
>  
>          cpu_x86_load_seg(env, R_CS, lduw(&sc->cs) | 3);
>          cpu_x86_load_seg(env, R_SS, lduw(&sc->ss) | 3);
>  
>  	{
>  		unsigned int tmpflags;
> -                tmpflags = ldl(&sc->eflags);
> +                tmpflags = ldul(&sc->eflags);
>  		env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
>                  //		regs->orig_eax = -1;		/* disable syscall checks */
>  	}
>  
>  	{
>  		struct _fpstate * buf;
> -                buf = (void *)ldl(&sc->fpstate);
> +                buf = (void *)ldul(&sc->fpstate);
>  		if (buf) {
>  #if 0
>  			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
> @@ -909,7 +909,7 @@ restore_sigcontext(CPUX86State *env, str
>  		}
>  	}
>  
> -        *peax = ldl(&sc->eax);
> +        *peax = ldul(&sc->eax);
>  	return err;
>  #if 0
>  badframe:
> Index: linux-user/vm86.c
> ===================================================================
> RCS file: /sources/qemu/qemu/linux-user/vm86.c,v
> retrieving revision 1.11
> diff -u -d -d -p -r1.11 vm86.c
> --- linux-user/vm86.c	17 Sep 2007 08:09:50 -0000	1.11
> +++ linux-user/vm86.c	16 Oct 2007 11:39:05 -0000
> @@ -56,7 +56,7 @@ static inline unsigned int vm_getw(uint8
>  
>  static inline unsigned int vm_getl(uint8_t *segptr, unsigned int reg16)
>  {
> -    return ldl(segptr + (reg16 & 0xffff));
> +    return ldul(segptr + (reg16 & 0xffff));
>  }
>  
>  void save_v86_state(CPUX86State *env)
> Index: target-alpha/exec.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-alpha/exec.h,v
> retrieving revision 1.4
> diff -u -d -d -p -r1.4 exec.h
> --- target-alpha/exec.h	14 Oct 2007 07:07:05 -0000	1.4
> +++ target-alpha/exec.h	16 Oct 2007 11:39:05 -0000
> @@ -62,6 +62,9 @@ register uint64_t T2 asm(AREG3);
>  
>  #if !defined(CONFIG_USER_ONLY)
>  #include "softmmu_exec.h"
> +#define REVERSE_ENDIAN
> +#include "softmmu_exec.h"
> +#undef REVERSE_ENDIAN
>  #endif /* !defined(CONFIG_USER_ONLY) */
>  
>  static inline void env_to_regs(void)
> Index: target-alpha/helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-alpha/helper.c,v
> retrieving revision 1.5
> diff -u -d -d -p -r1.5 helper.c
> --- target-alpha/helper.c	14 Oct 2007 07:07:05 -0000	1.5
> +++ target-alpha/helper.c	16 Oct 2007 11:39:05 -0000
> @@ -69,7 +69,7 @@ int cpu_alpha_handle_mmu_fault (CPUState
>              env->exception_index = EXCP_DTB_MISS_PAL;
>          else
>              env->exception_index = EXCP_DTB_MISS_NATIVE;
> -        opc = (ldl_code(env->pc) >> 21) << 4;
> +        opc = (ldul_code(env->pc) >> 21) << 4;
>          if (rw) {
>              opc |= 0x9;
>          } else {
> @@ -108,7 +108,7 @@ int cpu_alpha_mfpr (CPUState *env, int i
>          if (env->features & FEATURE_SPS)
>              *valp = env->ipr[IPR_ESP];
>          else
> -            *valp = ldq_raw(hwpcb + 8);
> +            *valp = lduq_raw(hwpcb + 8);
>          break;
>      case IPR_FEN:
>          *valp = ((int64_t)(env->ipr[IPR_FEN] << 63)) >> 63;
> @@ -127,7 +127,7 @@ int cpu_alpha_mfpr (CPUState *env, int i
>              if (env->features & FEATURE_SPS)
>                  *valp = env->ipr[IPR_KSP];
>              else
> -                *valp = ldq_raw(hwpcb + 0);
> +                *valp = lduq_raw(hwpcb + 0);
>          }
>          break;
>      case IPR_MCES:
> @@ -159,7 +159,7 @@ int cpu_alpha_mfpr (CPUState *env, int i
>          if (env->features & FEATURE_SPS)
>              *valp = env->ipr[IPR_SSP];
>          else
> -            *valp = ldq_raw(hwpcb + 16);
> +            *valp = lduq_raw(hwpcb + 16);
>          break;
>      case IPR_SYSPTBR:
>          if (env->features & FEATURE_VIRBND)
> @@ -200,7 +200,7 @@ int cpu_alpha_mfpr (CPUState *env, int i
>          if (env->features & FEATURE_SPS)
>              *valp = env->ipr[IPR_USP];
>          else
> -            *valp = ldq_raw(hwpcb + 24);
> +            *valp = lduq_raw(hwpcb + 24);
>          break;
>      case IPR_VIRBND:
>          if (env->features & FEATURE_VIRBND)
> @@ -255,7 +255,7 @@ int cpu_alpha_mtpr (CPUState *env, int i
>      case IPR_DATFX:
>          env->ipr[IPR_DATFX] &= ~0x1;
>          env->ipr[IPR_DATFX] |= val & 1;
> -        tmp64 = ldq_raw(hwpcb + 56);
> +        tmp64 = lduq_raw(hwpcb + 56);
>          tmp64 &= ~0x8000000000000000ULL;
>          tmp64 |= (val & 1) << 63;
>          stq_raw(hwpcb + 56, tmp64);
> @@ -268,7 +268,7 @@ int cpu_alpha_mtpr (CPUState *env, int i
>          break;
>      case IPR_FEN:
>          env->ipr[IPR_FEN] = val & 1;
> -        tmp64 = ldq_raw(hwpcb + 56);
> +        tmp64 = lduq_raw(hwpcb + 56);
>          tmp64 &= ~1;
>          tmp64 |= val & 1;
>          stq_raw(hwpcb + 56, tmp64);
> @@ -438,7 +438,7 @@ void cpu_dump_state (CPUState *env, FILE
>                  *((uint64_t *)(&env->ft0)), *((uint64_t *)(&env->ft1)),
>                  *((uint64_t *)(&env->ft2)));
>      cpu_fprintf(f, "\nMEM " TARGET_FMT_lx " %d %d\n",
> -                ldq_raw(0x000000004007df60ULL),
> +                lduq_raw(0x000000004007df60ULL),
>                  (uint8_t *)(&env->ft0), (uint8_t *)(&env->fir[0]));
>  }
>  
> Index: target-alpha/op_helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-alpha/op_helper.c,v
> retrieving revision 1.4
> diff -u -d -d -p -r1.4 op_helper.c
> --- target-alpha/op_helper.c	14 Oct 2007 08:18:12 -0000	1.4
> +++ target-alpha/op_helper.c	16 Oct 2007 11:39:05 -0000
> @@ -1213,6 +1213,21 @@ void helper_st_phys_to_virt (void)
>  
>  #define MMUSUFFIX _mmu
>  
> +/* Native-endian */
> +#define SHIFT 0
> +#include "softmmu_template.h"
> +
> +#define SHIFT 1
> +#include "softmmu_template.h"
> +
> +#define SHIFT 2
> +#include "softmmu_template.h"
> +
> +#define SHIFT 3
> +#include "softmmu_template.h"
> +
> +/* Reverse-endian */
> +#define REVERSE_ENDIAN
>  #define SHIFT 0
>  #include "softmmu_template.h"
>  
> @@ -1224,6 +1239,7 @@ void helper_st_phys_to_virt (void)
>  
>  #define SHIFT 3
>  #include "softmmu_template.h"
> +#undef REVERSE_ENDIAN
>  
>  /* try to fill the TLB and return an exception if error. If retaddr is
>     NULL, it means that the function was called in C code (i.e. not
> Index: target-alpha/op_mem.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-alpha/op_mem.h,v
> retrieving revision 1.2
> diff -u -d -d -p -r1.2 op_mem.h
> --- target-alpha/op_mem.h	16 Sep 2007 21:08:01 -0000	1.2
> +++ target-alpha/op_mem.h	16 Oct 2007 11:39:05 -0000
> @@ -30,14 +30,14 @@ static inline uint32_t glue(ldl_l, MEMSU
>  {
>      env->lock = EA;
>  
> -    return glue(ldl, MEMSUFFIX)(EA);
> +    return glue(ldul, MEMSUFFIX)(EA);
>  }
>  
>  static inline uint32_t glue(ldq_l, MEMSUFFIX) (target_ulong EA)
>  {
>      env->lock = EA;
>  
> -    return glue(ldq, MEMSUFFIX)(EA);
> +    return glue(lduq, MEMSUFFIX)(EA);
>  }
>  
>  static inline void glue(stl_c, MEMSUFFIX) (target_ulong EA, uint32_t data)
> @@ -82,12 +82,12 @@ ALPHA_LD_OP(bu, ldub);
>  ALPHA_ST_OP(b, stb);
>  ALPHA_LD_OP(wu, lduw);
>  ALPHA_ST_OP(w, stw);
> -ALPHA_LD_OP(l, ldl);
> +ALPHA_LD_OP(l, ldul);
>  ALPHA_ST_OP(l, stl);
> -ALPHA_LD_OP(q, ldq);
> +ALPHA_LD_OP(q, lduq);
>  ALPHA_ST_OP(q, stq);
>  
> -ALPHA_LD_OP(q_u, ldq);
> +ALPHA_LD_OP(q_u, lduq);
>  ALPHA_ST_OP(q_u, stq);
>  
>  ALPHA_LD_OP(l_l, ldl_l);
> Index: target-alpha/translate.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-alpha/translate.c,v
> retrieving revision 1.6
> diff -u -d -d -p -r1.6 translate.c
> --- target-alpha/translate.c	14 Oct 2007 08:50:17 -0000	1.6
> +++ target-alpha/translate.c	16 Oct 2007 11:39:05 -0000
> @@ -2010,7 +2010,7 @@ int gen_intermediate_code_internal (CPUS
>                      ctx.pc, ctx.mem_idx);
>          }
>  #endif
> -        insn = ldl_code(ctx.pc);
> +        insn = ldul_code(ctx.pc);
>  #if defined ALPHA_DEBUG_DISAS
>          insn_count++;
>          if (logfile != NULL) {
> Index: target-arm/exec.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-arm/exec.h,v
> retrieving revision 1.14
> diff -u -d -d -p -r1.14 exec.h
> --- target-arm/exec.h	14 Oct 2007 07:07:05 -0000	1.14
> +++ target-arm/exec.h	16 Oct 2007 11:39:05 -0000
> @@ -64,6 +64,9 @@ static inline int cpu_halted(CPUState *e
>  
>  #if !defined(CONFIG_USER_ONLY)
>  #include "softmmu_exec.h"
> +#define REVERSE_ENDIAN
> +#include "softmmu_exec.h"
> +#undef REVERSE_ENDIAN
>  #endif
>  
>  /* In op_helper.c */
> Index: target-arm/helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-arm/helper.c,v
> retrieving revision 1.23
> diff -u -d -d -p -r1.23 helper.c
> --- target-arm/helper.c	14 Oct 2007 07:07:05 -0000	1.23
> +++ target-arm/helper.c	16 Oct 2007 11:39:05 -0000
> @@ -297,7 +297,7 @@ void do_interrupt(CPUARMState *env)
>              if (env->thumb) {
>                  mask = lduw_code(env->regs[15] - 2) & 0xff;
>              } else {
> -                mask = ldl_code(env->regs[15] - 4) & 0xffffff;
> +                mask = ldul_code(env->regs[15] - 4) & 0xffffff;
>              }
>              /* Only intercept calls from privileged modes, to provide some
>                 semblance of security.  */
> @@ -473,7 +473,7 @@ static int get_phys_addr(CPUState *env, 
>          /* Pagetable walk.  */
>          /* Lookup l1 descriptor.  */
>          table = (env->cp15.c2_base & 0xffffc000) | ((address >> 18) & 0x3ffc);
> -        desc = ldl_phys(table);
> +        desc = ldul_phys(table);
>          type = (desc & 3);
>          domain = (env->cp15.c3 >> ((desc >> 4) & 0x1e)) & 3;
>          if (type == 0) {
> @@ -502,7 +502,7 @@ static int get_phys_addr(CPUState *env, 
>                  /* Fine pagetable.  */
>                  table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
>              }
> -            desc = ldl_phys(table);
> +            desc = ldul_phys(table);
>              switch (desc & 3) {
>              case 0: /* Page translation fault.  */
>                  code = 7;
> Index: target-arm/op_helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-arm/op_helper.c,v
> retrieving revision 1.7
> diff -u -d -d -p -r1.7 op_helper.c
> --- target-arm/op_helper.c	14 Oct 2007 07:07:05 -0000	1.7
> +++ target-arm/op_helper.c	16 Oct 2007 11:39:05 -0000
> @@ -180,6 +180,21 @@ void do_vfp_get_fpscr(void)
>  #define MMUSUFFIX _mmu
>  #define GETPC() (__builtin_return_address(0))
>  
> +/* Native-endian */
> +#define SHIFT 0
> +#include "softmmu_template.h"
> +
> +#define SHIFT 1
> +#include "softmmu_template.h"
> +
> +#define SHIFT 2
> +#include "softmmu_template.h"
> +
> +#define SHIFT 3
> +#include "softmmu_template.h"
> +
> +/* Reverse-endian */
> +#define REVERSE_ENDIAN
>  #define SHIFT 0
>  #include "softmmu_template.h"
>  
> @@ -191,6 +206,7 @@ void do_vfp_get_fpscr(void)
>  
>  #define SHIFT 3
>  #include "softmmu_template.h"
> +#undef REVERSE_ENDIAN
>  
>  /* try to fill the TLB and return an exception if error. If retaddr is
>     NULL, it means that the function was called in C code (i.e. not
> Index: target-arm/op_mem.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-arm/op_mem.h,v
> retrieving revision 1.2
> diff -u -d -d -p -r1.2 op_mem.h
> --- target-arm/op_mem.h	30 Apr 2007 02:02:16 -0000	1.2
> +++ target-arm/op_mem.h	16 Oct 2007 11:39:05 -0000
> @@ -1,18 +1,17 @@
>  /* ARM memory operations.  */
>  
> -/* Load from address T1 into T0.  */
> -#define MEM_LD_OP(name) \
> +#define MEM_LD_OP(name, lname) \
>  void OPPROTO glue(op_ld##name,MEMSUFFIX)(void) \
>  { \
> -    T0 = glue(ld##name,MEMSUFFIX)(T1); \
> +    T0 = glue(ld##lname,MEMSUFFIX)(T1); \
>      FORCE_RET(); \
>  }
>  
> -MEM_LD_OP(ub)
> -MEM_LD_OP(sb)
> -MEM_LD_OP(uw)
> -MEM_LD_OP(sw)
> -MEM_LD_OP(l)
> +MEM_LD_OP(ub,ub)
> +MEM_LD_OP(sb,sb)
> +MEM_LD_OP(uw,uw)
> +MEM_LD_OP(sw,sw)
> +MEM_LD_OP(l,ul)
>  
>  #undef MEM_LD_OP
>  
> @@ -45,7 +44,7 @@ void OPPROTO glue(op_swp##name,MEMSUFFIX
>  }
>  
>  MEM_SWP_OP(b, ub)
> -MEM_SWP_OP(l, l)
> +MEM_SWP_OP(l, ul)
>  
>  #undef MEM_SWP_OP
>  
> @@ -82,8 +81,8 @@ void OPPROTO glue(op_iwmmxt_st##name,MEM
>  
>  MMX_MEM_OP(b, ub)
>  MMX_MEM_OP(w, uw)
> -MMX_MEM_OP(l, l)
> -MMX_MEM_OP(q, q)
> +MMX_MEM_OP(l, ul)
> +MMX_MEM_OP(q, uq)
>  
>  #undef MMX_MEM_OP
>  
> Index: target-arm/translate.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-arm/translate.c,v
> retrieving revision 1.57
> diff -u -d -d -p -r1.57 translate.c
> --- target-arm/translate.c	17 Sep 2007 08:09:51 -0000	1.57
> +++ target-arm/translate.c	16 Oct 2007 11:39:05 -0000
> @@ -2206,7 +2206,7 @@ static void disas_arm_insn(CPUState * en
>  {
>      unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
>  
> -    insn = ldl_code(s->pc);
> +    insn = ldul_code(s->pc);
>      s->pc += 4;
>  
>      cond = insn >> 28;
> Index: target-cris/exec.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-cris/exec.h,v
> retrieving revision 1.2
> diff -u -d -d -p -r1.2 exec.h
> --- target-cris/exec.h	14 Oct 2007 07:07:06 -0000	1.2
> +++ target-cris/exec.h	16 Oct 2007 11:39:06 -0000
> @@ -50,6 +50,9 @@ void tlb_fill (target_ulong addr, int is
>  
>  #if !defined(CONFIG_USER_ONLY)
>  #include "softmmu_exec.h"
> +#define REVERSE_ENDIAN
> +#include "softmmu_exec.h"
> +#undef REVERSE_ENDIAN
>  #endif
>  
>  void cpu_cris_flush_flags(CPUCRISState *env, int cc_op);
> Index: target-cris/helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-cris/helper.c,v
> retrieving revision 1.2
> diff -u -d -d -p -r1.2 helper.c
> --- target-cris/helper.c	14 Oct 2007 07:07:06 -0000	1.2
> +++ target-cris/helper.c	16 Oct 2007 11:39:06 -0000
> @@ -106,7 +106,7 @@ void do_interrupt(CPUState *env)
>  //			printf ("BREAK! %d\n", env->trapnr);
>  			irqnum = env->trapnr;
>  			ebp = env->pregs[SR_EBP];
> -			isr = ldl_code(ebp + irqnum * 4);
> +			isr = ldul_code(ebp + irqnum * 4);
>  			env->pregs[SR_ERP] = env->pc + 2;
>  			env->pc = isr;
>  
> @@ -117,7 +117,7 @@ void do_interrupt(CPUState *env)
>  //			printf ("MMU miss\n");
>  			irqnum = 4;
>  			ebp = env->pregs[SR_EBP];
> -			isr = ldl_code(ebp + irqnum * 4);
> +			isr = ldul_code(ebp + irqnum * 4);
>  			env->pregs[SR_ERP] = env->pc;
>  			env->pc = isr;
>  			cris_shift_ccs(env);
> @@ -138,7 +138,7 @@ void do_interrupt(CPUState *env)
>  					__builtin_clz(env->pending_interrupts);
>  				irqnum += 0x30;
>  				ebp = env->pregs[SR_EBP];
> -				isr = ldl_code(ebp + irqnum * 4);
> +				isr = ldul_code(ebp + irqnum * 4);
>  				env->pregs[SR_ERP] = env->pc;
>  				env->pc = isr;
>  
> Index: target-cris/op_helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-cris/op_helper.c,v
> retrieving revision 1.2
> diff -u -d -d -p -r1.2 op_helper.c
> --- target-cris/op_helper.c	14 Oct 2007 07:07:06 -0000	1.2
> +++ target-cris/op_helper.c	16 Oct 2007 11:39:06 -0000
> @@ -25,6 +25,21 @@
>  #define MMUSUFFIX _mmu
>  #define GETPC() (__builtin_return_address(0))
>  
> +/* Native-endian */
> +#define SHIFT 0
> +#include "softmmu_template.h"
> +
> +#define SHIFT 1
> +#include "softmmu_template.h"
> +
> +#define SHIFT 2
> +#include "softmmu_template.h"
> +
> +#define SHIFT 3
> +#include "softmmu_template.h"
> +
> +/* Reverse-endian */
> +#define REVERSE_ENDIAN
>  #define SHIFT 0
>  #include "softmmu_template.h"
>  
> @@ -36,6 +51,7 @@
>  
>  #define SHIFT 3
>  #include "softmmu_template.h"
> +#undef REVERSE_ENDIAN
>  
>  /* Try to fill the TLB and return an exception if error. If retaddr is
>     NULL, it means that the function was called in C code (i.e. not
> Index: target-cris/op_mem.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-cris/op_mem.c,v
> retrieving revision 1.1
> diff -u -d -d -p -r1.1 op_mem.c
> --- target-cris/op_mem.c	8 Oct 2007 13:04:02 -0000	1.1
> +++ target-cris/op_mem.c	16 Oct 2007 11:39:06 -0000
> @@ -49,7 +49,7 @@ void glue(op_stw_T0_T1, MEMSUFFIX) (void
>  }
>  
>  void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
> -    T0 = glue(ldl, MEMSUFFIX) (T0);
> +    T0 = glue(ldul, MEMSUFFIX) (T0);
>      RETURN();
>  }
>  
> Index: target-cris/translate.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-cris/translate.c,v
> retrieving revision 1.1
> diff -u -d -d -p -r1.1 translate.c
> --- target-cris/translate.c	8 Oct 2007 12:49:08 -0000	1.1
> +++ target-cris/translate.c	16 Oct 2007 11:39:06 -0000
> @@ -828,7 +828,7 @@ static int dec_prep_alu_m(DisasContext *
>  		if (memsize == 1)
>  			insn_len++;
>  
> -		imm = ldl_code(dc->pc + 2);
> +		imm = ldul_code(dc->pc + 2);
>  		if (memsize != 4) {
>  			if (s_ext) {
>  				imm = sign_extend(imm, (memsize * 8) - 1);
> @@ -1962,7 +1962,7 @@ static unsigned int dec_lapc_im(DisasCon
>  	rd = dc->op2;
>  
>  	cris_cc_mask(dc, 0);
> -	imm = ldl_code(dc->pc + 2);
> +	imm = ldul_code(dc->pc + 2);
>  	DIS(fprintf (logfile, "lapc 0x%x, $r%u\n", imm + dc->pc, dc->op2));
>  	gen_op_movl_T0_im (dc->pc + imm);
>  	gen_movl_reg_T0[rd] ();
> @@ -1999,7 +1999,7 @@ static unsigned int dec_jas_im(DisasCont
>  {
>  	uint32_t imm;
>  
> -	imm = ldl_code(dc->pc + 2);
> +	imm = ldul_code(dc->pc + 2);
>  
>  	DIS(fprintf (logfile, "jas 0x%x\n", imm));
>  	cris_cc_mask(dc, 0);
> @@ -2016,7 +2016,7 @@ static unsigned int dec_jasc_im(DisasCon
>  {
>  	uint32_t imm;
>  
> -	imm = ldl_code(dc->pc + 2);
> +	imm = ldul_code(dc->pc + 2);
>  
>  	DIS(fprintf (logfile, "jasc 0x%x\n", imm));
>  	cris_cc_mask(dc, 0);
> @@ -2047,7 +2047,7 @@ static unsigned int dec_bcc_im(DisasCont
>  	int32_t offset;
>  	uint32_t cond = dc->op2;
>  
> -	offset = ldl_code(dc->pc + 2);
> +	offset = ldul_code(dc->pc + 2);
>  	offset = sign_extend(offset, 15);
>  
>  	DIS(fprintf (logfile, "b%s %d pc=%x dst=%x\n",
> @@ -2065,7 +2065,7 @@ static unsigned int dec_bas_im(DisasCont
>  	int32_t simm;
>  
>  
> -	simm = ldl_code(dc->pc + 2);
> +	simm = ldul_code(dc->pc + 2);
>  
>  	DIS(fprintf (logfile, "bas 0x%x, $p%u\n", dc->pc + simm, dc->op2));
>  	cris_cc_mask(dc, 0);
> @@ -2081,7 +2081,7 @@ static unsigned int dec_bas_im(DisasCont
>  static unsigned int dec_basc_im(DisasContext *dc)
>  {
>  	int32_t simm;
> -	simm = ldl_code(dc->pc + 2);
> +	simm = ldul_code(dc->pc + 2);
>  
>  	DIS(fprintf (logfile, "basc 0x%x, $p%u\n", dc->pc + simm, dc->op2));
>  	cris_cc_mask(dc, 0);
> @@ -2259,7 +2259,7 @@ cris_decoder(DisasContext *dc)
>  	int i;
>  
>  	/* Load a halfword onto the instruction register.  */
> -	tmp = ldl_code(dc->pc);
> +	tmp = ldul_code(dc->pc);
>  	dc->ir = tmp & 0xffff;
>  
>  	/* Now decode it.  */
> Index: target-i386/exec.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-i386/exec.h,v
> retrieving revision 1.38
> diff -u -d -d -p -r1.38 exec.h
> --- target-i386/exec.h	14 Oct 2007 07:07:06 -0000	1.38
> +++ target-i386/exec.h	16 Oct 2007 11:39:06 -0000
> @@ -217,6 +217,9 @@ void check_iol_DX(void);
>  #if !defined(CONFIG_USER_ONLY)
>  
>  #include "softmmu_exec.h"
> +#define REVERSE_ENDIAN
> +#include "softmmu_exec.h"
> +#undef REVERSE_ENDIAN
>  
>  static inline double ldfq(target_ulong ptr)
>  {
> @@ -224,7 +227,7 @@ static inline double ldfq(target_ulong p
>          double d;
>          uint64_t i;
>      } u;
> -    u.i = ldq(ptr);
> +    u.i = lduq(ptr);
>      return u.d;
>  }
>  
> @@ -244,7 +247,7 @@ static inline float ldfl(target_ulong pt
>          float f;
>          uint32_t i;
>      } u;
> -    u.i = ldl(ptr);
> +    u.i = ldul(ptr);
>      return u.f;
>  }
>  
> @@ -388,7 +391,7 @@ static inline CPU86_LDouble helper_fldt(
>      /* XXX: handle overflow ? */
>      e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */
>      e |= (upper >> 4) & 0x800; /* sign */
> -    ll = (ldq(ptr) >> 11) & ((1LL << 52) - 1);
> +    ll = (lduq(ptr) >> 11) & ((1LL << 52) - 1);
>  #ifdef __arm__
>      temp.l.upper = (e << 20) | (ll >> 32);
>      temp.l.lower = ll;
> @@ -419,12 +422,12 @@ static inline void helper_fstt(CPU86_LDo
>  
>  static inline CPU86_LDouble helper_fldt(target_ulong ptr)
>  {
> -    return *(CPU86_LDouble *)ptr;
> +    return *(CPU86_LDouble *)(unsigned long)ptr;
>  }
>  
>  static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
>  {
> -    *(CPU86_LDouble *)ptr = f;
> +    *(CPU86_LDouble *)(unsigned long)ptr = f;
>  }
>  
>  #else
> @@ -435,7 +438,7 @@ static inline CPU86_LDouble helper_fldt(
>  {
>      CPU86_LDoubleU temp;
>  
> -    temp.l.lower = ldq(ptr);
> +    temp.l.lower = lduq(ptr);
>      temp.l.upper = lduw(ptr + 8);
>      return temp.d;
>  }
> Index: target-i386/helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-i386/helper.c,v
> retrieving revision 1.90
> diff -u -d -d -p -r1.90 helper.c
> --- target-i386/helper.c	14 Oct 2007 07:07:06 -0000	1.90
> +++ target-i386/helper.c	16 Oct 2007 11:39:06 -0000
> @@ -122,8 +122,8 @@ static inline int load_segment(uint32_t 
>      if ((index + 7) > dt->limit)
>          return -1;
>      ptr = dt->base + index;
> -    *e1_ptr = ldl_kernel(ptr);
> -    *e2_ptr = ldl_kernel(ptr + 4);
> +    *e1_ptr = ldul_kernel(ptr);
> +    *e2_ptr = ldul_kernel(ptr + 4);
>      return 0;
>  }
>  
> @@ -186,7 +186,7 @@ static inline void get_ss_esp_from_tss(u
>          *esp_ptr = lduw_kernel(env->tr.base + index);
>          *ss_ptr = lduw_kernel(env->tr.base + index + 2);
>      } else {
> -        *esp_ptr = ldl_kernel(env->tr.base + index);
> +        *esp_ptr = ldul_kernel(env->tr.base + index);
>          *ss_ptr = lduw_kernel(env->tr.base + index + 4);
>      }
>  }
> @@ -302,15 +302,15 @@ static void switch_tss(int tss_selector,
>      /* read all the registers from the new TSS */
>      if (type & 8) {
>          /* 32 bit */
> -        new_cr3 = ldl_kernel(tss_base + 0x1c);
> -        new_eip = ldl_kernel(tss_base + 0x20);
> -        new_eflags = ldl_kernel(tss_base + 0x24);
> +        new_cr3 = ldul_kernel(tss_base + 0x1c);
> +        new_eip = ldul_kernel(tss_base + 0x20);
> +        new_eflags = ldul_kernel(tss_base + 0x24);
>          for(i = 0; i < 8; i++)
> -            new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
> +            new_regs[i] = ldul_kernel(tss_base + (0x28 + i * 4));
>          for(i = 0; i < 6; i++)
>              new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
>          new_ldt = lduw_kernel(tss_base + 0x60);
> -        new_trap = ldl_kernel(tss_base + 0x64);
> +        new_trap = ldul_kernel(tss_base + 0x64);
>      } else {
>          /* 16 bit */
>          new_cr3 = 0;
> @@ -341,7 +341,7 @@ static void switch_tss(int tss_selector,
>          target_ulong ptr;
>          uint32_t e2;
>          ptr = env->gdt.base + (env->tr.selector & ~7);
> -        e2 = ldl_kernel(ptr + 4);
> +        e2 = ldul_kernel(ptr + 4);
>          e2 &= ~DESC_TSS_BUSY_MASK;
>          stl_kernel(ptr + 4, e2);
>      }
> @@ -393,7 +393,7 @@ static void switch_tss(int tss_selector,
>          target_ulong ptr;
>          uint32_t e2;
>          ptr = env->gdt.base + (tss_selector & ~7);
> -        e2 = ldl_kernel(ptr + 4);
> +        e2 = ldul_kernel(ptr + 4);
>          e2 |= DESC_TSS_BUSY_MASK;
>          stl_kernel(ptr + 4, e2);
>      }
> @@ -456,8 +456,8 @@ static void switch_tss(int tss_selector,
>          if ((index + 7) > dt->limit)
>              raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
>          ptr = dt->base + index;
> -        e1 = ldl_kernel(ptr);
> -        e2 = ldl_kernel(ptr + 4);
> +        e1 = ldul_kernel(ptr);
> +        e2 = ldul_kernel(ptr + 4);
>          if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
>              raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
>          if (!(e2 & DESC_P_MASK))
> @@ -580,7 +580,7 @@ do {\
>  
>  #define POPL(ssp, sp, sp_mask, val)\
>  {\
> -    val = (uint32_t)ldl_kernel((ssp) + (sp & (sp_mask)));\
> +    val = (uint32_t)ldul_kernel((ssp) + (sp & (sp_mask)));\
>      sp += 4;\
>  }
>  
> @@ -629,8 +629,8 @@ static void do_interrupt_protected(int i
>      if (intno * 8 + 7 > dt->limit)
>          raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
>      ptr = dt->base + intno * 8;
> -    e1 = ldl_kernel(ptr);
> -    e2 = ldl_kernel(ptr + 4);
> +    e1 = ldul_kernel(ptr);
> +    e2 = ldul_kernel(ptr + 4);
>      /* check gate type */
>      type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
>      switch(type) {
> @@ -810,7 +810,7 @@ static void do_interrupt_protected(int i
>  
>  #define POPQ(sp, val)\
>  {\
> -    val = ldq_kernel(sp);\
> +    val = lduq_kernel(sp);\
>      sp += 8;\
>  }
>  
> @@ -828,7 +828,7 @@ static inline target_ulong get_rsp_from_
>      index = 8 * level + 4;
>      if ((index + 7) > env->tr.limit)
>          raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
> -    return ldq_kernel(env->tr.base + index);
> +    return lduq_kernel(env->tr.base + index);
>  }
>  
>  /* 64 bit interrupt */
> @@ -875,9 +875,9 @@ static void do_interrupt64(int intno, in
>      if (intno * 16 + 15 > dt->limit)
>          raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
>      ptr = dt->base + intno * 16;
> -    e1 = ldl_kernel(ptr);
> -    e2 = ldl_kernel(ptr + 4);
> -    e3 = ldl_kernel(ptr + 8);
> +    e1 = ldul_kernel(ptr);
> +    e2 = ldul_kernel(ptr + 4);
> +    e3 = ldul_kernel(ptr + 8);
>      /* check gate type */
>      type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
>      switch(type) {
> @@ -1147,7 +1147,7 @@ void do_interrupt_user(int intno, int is
>  
>      dt = &env->idt;
>      ptr = dt->base + (intno * 8);
> -    e2 = ldl_kernel(ptr + 4);
> +    e2 = ldul_kernel(ptr + 4);
>  
>      dpl = (e2 >> DESC_DPL_SHIFT) & 3;
>      cpl = env->hflags & HF_CPL_MASK;
> @@ -1458,7 +1458,7 @@ void helper_rsm(void)
>  
>      sm_state = env->smbase + 0x8000;
>  #ifdef TARGET_X86_64
> -    env->efer = ldq_phys(sm_state + 0x7ed0);
> +    env->efer = lduq_phys(sm_state + 0x7ed0);
>      if (env->efer & MSR_EFER_LMA)
>          env->hflags |= HF_LMA_MASK;
>      else
> @@ -1468,83 +1468,83 @@ void helper_rsm(void)
>          offset = 0x7e00 + i * 16;
>          cpu_x86_load_seg_cache(env, i,
>                                 lduw_phys(sm_state + offset),
> -                               ldq_phys(sm_state + offset + 8),
> -                               ldl_phys(sm_state + offset + 4),
> +                               lduq_phys(sm_state + offset + 8),
> +                               ldul_phys(sm_state + offset + 4),
>                                 (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8);
>      }
>  
> -    env->gdt.base = ldq_phys(sm_state + 0x7e68);
> -    env->gdt.limit = ldl_phys(sm_state + 0x7e64);
> +    env->gdt.base = lduq_phys(sm_state + 0x7e68);
> +    env->gdt.limit = ldul_phys(sm_state + 0x7e64);
>  
>      env->ldt.selector = lduw_phys(sm_state + 0x7e70);
> -    env->ldt.base = ldq_phys(sm_state + 0x7e78);
> -    env->ldt.limit = ldl_phys(sm_state + 0x7e74);
> +    env->ldt.base = lduq_phys(sm_state + 0x7e78);
> +    env->ldt.limit = ldul_phys(sm_state + 0x7e74);
>      env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8;
>  
> -    env->idt.base = ldq_phys(sm_state + 0x7e88);
> -    env->idt.limit = ldl_phys(sm_state + 0x7e84);
> +    env->idt.base = lduq_phys(sm_state + 0x7e88);
> +    env->idt.limit = ldul_phys(sm_state + 0x7e84);
>  
>      env->tr.selector = lduw_phys(sm_state + 0x7e90);
> -    env->tr.base = ldq_phys(sm_state + 0x7e98);
> -    env->tr.limit = ldl_phys(sm_state + 0x7e94);
> +    env->tr.base = lduq_phys(sm_state + 0x7e98);
> +    env->tr.limit = ldul_phys(sm_state + 0x7e94);
>      env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8;
>  
> -    EAX = ldq_phys(sm_state + 0x7ff8);
> -    ECX = ldq_phys(sm_state + 0x7ff0);
> -    EDX = ldq_phys(sm_state + 0x7fe8);
> -    EBX = ldq_phys(sm_state + 0x7fe0);
> -    ESP = ldq_phys(sm_state + 0x7fd8);
> -    EBP = ldq_phys(sm_state + 0x7fd0);
> -    ESI = ldq_phys(sm_state + 0x7fc8);
> -    EDI = ldq_phys(sm_state + 0x7fc0);
> +    EAX = lduq_phys(sm_state + 0x7ff8);
> +    ECX = lduq_phys(sm_state + 0x7ff0);
> +    EDX = lduq_phys(sm_state + 0x7fe8);
> +    EBX = lduq_phys(sm_state + 0x7fe0);
> +    ESP = lduq_phys(sm_state + 0x7fd8);
> +    EBP = lduq_phys(sm_state + 0x7fd0);
> +    ESI = lduq_phys(sm_state + 0x7fc8);
> +    EDI = lduq_phys(sm_state + 0x7fc0);
>      for(i = 8; i < 16; i++)
> -        env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8);
> -    env->eip = ldq_phys(sm_state + 0x7f78);
> -    load_eflags(ldl_phys(sm_state + 0x7f70),
> +        env->regs[i] = lduq_phys(sm_state + 0x7ff8 - i * 8);
> +    env->eip = lduq_phys(sm_state + 0x7f78);
> +    load_eflags(ldul_phys(sm_state + 0x7f70),
>                  ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
> -    env->dr[6] = ldl_phys(sm_state + 0x7f68);
> -    env->dr[7] = ldl_phys(sm_state + 0x7f60);
> +    env->dr[6] = ldul_phys(sm_state + 0x7f68);
> +    env->dr[7] = ldul_phys(sm_state + 0x7f60);
>  
> -    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48));
> -    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50));
> -    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58));
> +    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f48));
> +    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7f50));
> +    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7f58));
>  
> -    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
> +    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
>      if (val & 0x20000) {
> -        env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff;
> +        env->smbase = ldul_phys(sm_state + 0x7f00) & ~0x7fff;
>      }
>  #else
> -    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc));
> -    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8));
> -    load_eflags(ldl_phys(sm_state + 0x7ff4),
> +    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7ffc));
> +    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7ff8));
> +    load_eflags(ldul_phys(sm_state + 0x7ff4),
>                  ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
> -    env->eip = ldl_phys(sm_state + 0x7ff0);
> -    EDI = ldl_phys(sm_state + 0x7fec);
> -    ESI = ldl_phys(sm_state + 0x7fe8);
> -    EBP = ldl_phys(sm_state + 0x7fe4);
> -    ESP = ldl_phys(sm_state + 0x7fe0);
> -    EBX = ldl_phys(sm_state + 0x7fdc);
> -    EDX = ldl_phys(sm_state + 0x7fd8);
> -    ECX = ldl_phys(sm_state + 0x7fd4);
> -    EAX = ldl_phys(sm_state + 0x7fd0);
> -    env->dr[6] = ldl_phys(sm_state + 0x7fcc);
> -    env->dr[7] = ldl_phys(sm_state + 0x7fc8);
> +    env->eip = ldul_phys(sm_state + 0x7ff0);
> +    EDI = ldul_phys(sm_state + 0x7fec);
> +    ESI = ldul_phys(sm_state + 0x7fe8);
> +    EBP = ldul_phys(sm_state + 0x7fe4);
> +    ESP = ldul_phys(sm_state + 0x7fe0);
> +    EBX = ldul_phys(sm_state + 0x7fdc);
> +    EDX = ldul_phys(sm_state + 0x7fd8);
> +    ECX = ldul_phys(sm_state + 0x7fd4);
> +    EAX = ldul_phys(sm_state + 0x7fd0);
> +    env->dr[6] = ldul_phys(sm_state + 0x7fcc);
> +    env->dr[7] = ldul_phys(sm_state + 0x7fc8);
>  
> -    env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff;
> -    env->tr.base = ldl_phys(sm_state + 0x7f64);
> -    env->tr.limit = ldl_phys(sm_state + 0x7f60);
> -    env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
> +    env->tr.selector = ldul_phys(sm_state + 0x7fc4) & 0xffff;
> +    env->tr.base = ldul_phys(sm_state + 0x7f64);
> +    env->tr.limit = ldul_phys(sm_state + 0x7f60);
> +    env->tr.flags = (ldul_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
>  
> -    env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff;
> -    env->ldt.base = ldl_phys(sm_state + 0x7f80);
> -    env->ldt.limit = ldl_phys(sm_state + 0x7f7c);
> -    env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
> +    env->ldt.selector = ldul_phys(sm_state + 0x7fc0) & 0xffff;
> +    env->ldt.base = ldul_phys(sm_state + 0x7f80);
> +    env->ldt.limit = ldul_phys(sm_state + 0x7f7c);
> +    env->ldt.flags = (ldul_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
>  
> -    env->gdt.base = ldl_phys(sm_state + 0x7f74);
> -    env->gdt.limit = ldl_phys(sm_state + 0x7f70);
> +    env->gdt.base = ldul_phys(sm_state + 0x7f74);
> +    env->gdt.limit = ldul_phys(sm_state + 0x7f70);
>  
> -    env->idt.base = ldl_phys(sm_state + 0x7f58);
> -    env->idt.limit = ldl_phys(sm_state + 0x7f54);
> +    env->idt.base = ldul_phys(sm_state + 0x7f58);
> +    env->idt.limit = ldul_phys(sm_state + 0x7f54);
>  
>      for(i = 0; i < 6; i++) {
>          if (i < 3)
> @@ -1552,16 +1552,16 @@ void helper_rsm(void)
>          else
>              offset = 0x7f2c + (i - 3) * 12;
>          cpu_x86_load_seg_cache(env, i,
> -                               ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
> -                               ldl_phys(sm_state + offset + 8),
> -                               ldl_phys(sm_state + offset + 4),
> -                               (ldl_phys(sm_state + offset) & 0xf0ff) << 8);
> +                               ldul_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
> +                               ldul_phys(sm_state + offset + 8),
> +                               ldul_phys(sm_state + offset + 4),
> +                               (ldul_phys(sm_state + offset) & 0xf0ff) << 8);
>      }
> -    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14));
> +    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f14));
>  
> -    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
> +    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
>      if (val & 0x20000) {
> -        env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff;
> +        env->smbase = ldul_phys(sm_state + 0x7ef8) & ~0x7fff;
>      }
>  #endif
>      CC_OP = CC_OP_EFLAGS;
> @@ -1643,7 +1643,7 @@ void helper_cmpxchg8b(void)
>      int eflags;
>  
>      eflags = cc_table[CC_OP].compute_all();
> -    d = ldq(A0);
> +    d = lduq(A0);
>      if (d == (((uint64_t)EDX << 32) | EAX)) {
>          stq(A0, ((uint64_t)ECX << 32) | EBX);
>          eflags |= CC_Z;
> @@ -1761,7 +1761,7 @@ void helper_enter_level(int level, int d
>          while (--level) {
>              esp -= 4;
>              ebp -= 4;
> -            stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask)));
> +            stl(ssp + (esp & esp_mask), ldul(ssp + (ebp & esp_mask)));
>          }
>          esp -= 4;
>          stl(ssp + (esp & esp_mask), T1);
> @@ -1791,7 +1791,7 @@ void helper_enter64_level(int level, int
>          while (--level) {
>              esp -= 8;
>              ebp -= 8;
> -            stq(esp, ldq(ebp));
> +            stq(esp, lduq(ebp));
>          }
>          esp -= 8;
>          stq(esp, T1);
> @@ -1836,8 +1836,8 @@ void helper_lldt_T0(void)
>          if ((index + entry_limit) > dt->limit)
>              raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
>          ptr = dt->base + index;
> -        e1 = ldl_kernel(ptr);
> -        e2 = ldl_kernel(ptr + 4);
> +        e1 = ldul_kernel(ptr);
> +        e2 = ldul_kernel(ptr + 4);
>          if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
>              raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
>          if (!(e2 & DESC_P_MASK))
> @@ -1845,7 +1845,7 @@ void helper_lldt_T0(void)
>  #ifdef TARGET_X86_64
>          if (env->hflags & HF_LMA_MASK) {
>              uint32_t e3;
> -            e3 = ldl_kernel(ptr + 8);
> +            e3 = ldul_kernel(ptr + 8);
>              load_seg_cache_raw_dt(&env->ldt, e1, e2);
>              env->ldt.base |= (target_ulong)e3 << 32;
>          } else
> @@ -1885,8 +1885,8 @@ void helper_ltr_T0(void)
>          if ((index + entry_limit) > dt->limit)
>              raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
>          ptr = dt->base + index;
> -        e1 = ldl_kernel(ptr);
> -        e2 = ldl_kernel(ptr + 4);
> +        e1 = ldul_kernel(ptr);
> +        e2 = ldul_kernel(ptr + 4);
>          type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
>          if ((e2 & DESC_S_MASK) ||
>              (type != 1 && type != 9))
> @@ -1896,8 +1896,8 @@ void helper_ltr_T0(void)
>  #ifdef TARGET_X86_64
>          if (env->hflags & HF_LMA_MASK) {
>              uint32_t e3, e4;
> -            e3 = ldl_kernel(ptr + 8);
> -            e4 = ldl_kernel(ptr + 12);
> +            e3 = ldul_kernel(ptr + 8);
> +            e4 = ldul_kernel(ptr + 12);
>              if ((e4 >> DESC_TYPE_SHIFT) & 0xf)
>                  raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
>              load_seg_cache_raw_dt(&env->tr, e1, e2);
> @@ -1943,8 +1943,8 @@ void load_seg(int seg_reg, int selector)
>          if ((index + 7) > dt->limit)
>              raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
>          ptr = dt->base + index;
> -        e1 = ldl_kernel(ptr);
> -        e2 = ldl_kernel(ptr + 4);
> +        e1 = ldul_kernel(ptr);
> +        e2 = ldul_kernel(ptr + 4);
>  
>          if (!(e2 & DESC_S_MASK))
>              raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
> @@ -2273,7 +2273,7 @@ void helper_lcall_protected_T0_T1(int sh
>                  PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector);
>                  PUSHL(ssp, sp, sp_mask, ESP);
>                  for(i = param_count - 1; i >= 0; i--) {
> -                    val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
> +                    val = ldul_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
>                      PUSHL(ssp, sp, sp_mask, val);
>                  }
>              } else {
> @@ -3569,13 +3569,13 @@ void helper_fxrstor(target_ulong ptr, in
>  
>      if (env->cr[4] & CR4_OSFXSR_MASK) {
>          /* XXX: finish it */
> -        env->mxcsr = ldl(ptr + 0x18);
> -        //ldl(ptr + 0x1c);
> +        env->mxcsr = ldul(ptr + 0x18);
> +        //ldul(ptr + 0x1c);
>          nb_xmm_regs = 8 << data64;
>          addr = ptr + 0xa0;
>          for(i = 0; i < nb_xmm_regs; i++) {
> -            env->xmm_regs[i].XMM_Q(0) = ldq(addr);
> -            env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
> +            env->xmm_regs[i].XMM_Q(0) = lduq(addr);
> +            env->xmm_regs[i].XMM_Q(1) = lduq(addr + 8);
>              addr += 16;
>          }
>      }
> @@ -3867,6 +3867,21 @@ void update_fp_status(void)
>  #define MMUSUFFIX _mmu
>  #define GETPC() (__builtin_return_address(0))
>  
> +/* Native-endian */
> +#define SHIFT 0
> +#include "softmmu_template.h"
> +
> +#define SHIFT 1
> +#include "softmmu_template.h"
> +
> +#define SHIFT 2
> +#include "softmmu_template.h"
> +
> +#define SHIFT 3
> +#include "softmmu_template.h"
> +
> +/* Reverse-endian */
> +#define REVERSE_ENDIAN
>  #define SHIFT 0
>  #include "softmmu_template.h"
>  
> @@ -3878,6 +3893,7 @@ void update_fp_status(void)
>  
>  #define SHIFT 3
>  #include "softmmu_template.h"
> +#undef REVERSE_ENDIAN
>  
>  #endif
>  
> @@ -4005,27 +4021,27 @@ void helper_vmrun(target_ulong addr)
>         vmcb in svm mode */
>      /* We shift all the intercept bits so we can OR them with the TB
>         flags later on */
> -    env->intercept            = (ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept)) << INTERCEPT_INTR) | INTERCEPT_SVM_MASK;
> +    env->intercept            = (lduq_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept)) << INTERCEPT_INTR) | INTERCEPT_SVM_MASK;
>      env->intercept_cr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_read));
>      env->intercept_cr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write));
>      env->intercept_dr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read));
>      env->intercept_dr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write));
> -    env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
> +    env->intercept_exceptions = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
>  
> -    env->gdt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
> -    env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
> +    env->gdt.base  = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
> +    env->gdt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
>  
> -    env->idt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
> -    env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
> +    env->idt.base  = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
> +    env->idt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
>  
>      /* clear exit_info_2 so we behave like the real hardware */
>      stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
>  
> -    cpu_x86_update_cr0(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0)));
> -    cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
> -    cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
> -    env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
> -    int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
> +    cpu_x86_update_cr0(env, lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0)));
> +    cpu_x86_update_cr4(env, lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
> +    cpu_x86_update_cr3(env, lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
> +    env->cr[2] = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
> +    int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
>      if (int_ctl & V_INTR_MASKING_MASK) {
>          env->cr[8] = int_ctl & V_TPR_MASK;
>          if (env->eflags & IF_MASK)
> @@ -4033,13 +4049,13 @@ void helper_vmrun(target_ulong addr)
>      }
>  
>  #ifdef TARGET_X86_64
> -    env->efer = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer));
> +    env->efer = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer));
>      env->hflags &= ~HF_LMA_MASK;
>      if (env->efer & MSR_EFER_LMA)
>         env->hflags |= HF_LMA_MASK;
>  #endif
>      env->eflags = 0;
> -    load_eflags(ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags)),
> +    load_eflags(lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags)),
>                  ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
>      CC_OP = CC_OP_EFLAGS;
>      CC_DST = 0xffffffff;
> @@ -4049,12 +4065,12 @@ void helper_vmrun(target_ulong addr)
>      SVM_LOAD_SEG(env->vm_vmcb, SS, ss);
>      SVM_LOAD_SEG(env->vm_vmcb, DS, ds);
>  
> -    EIP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip));
> +    EIP = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip));
>      env->eip = EIP;
> -    ESP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp));
> -    EAX = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax));
> -    env->dr[7] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7));
> -    env->dr[6] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6));
> +    ESP = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp));
> +    EAX = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax));
> +    env->dr[7] = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7));
> +    env->dr[6] = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6));
>      cpu_x86_set_cpl(env, ldub_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl)));
>  
>      /* FIXME: guest state consistency checks */
> @@ -4073,11 +4089,11 @@ void helper_vmrun(target_ulong addr)
>      regs_to_env();
>  
>      /* maybe we need to inject an event */
> -    event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
> +    event_inj = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
>      if (event_inj & SVM_EVTINJ_VALID) {
>          uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
>          uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
> -        uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
> +        uint32_t event_inj_err = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
>          stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
>  
>          if (loglevel & CPU_LOG_TB_IN_ASM)
> @@ -4137,7 +4153,7 @@ void helper_vmload(target_ulong addr)
>  {
>      if (loglevel & CPU_LOG_TB_IN_ASM)
>          fprintf(logfile,"vmload! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
> -                addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
> +                addr, lduq_phys(addr + offsetof(struct vmcb, save.fs.base)),
>                  env->segs[R_FS].base);
>  
>      SVM_LOAD_SEG2(addr, segs[R_FS], fs);
> @@ -4146,22 +4162,22 @@ void helper_vmload(target_ulong addr)
>      SVM_LOAD_SEG2(addr, ldt, ldtr);
>  
>  #ifdef TARGET_X86_64
> -    env->kernelgsbase = ldq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base));
> -    env->lstar = ldq_phys(addr + offsetof(struct vmcb, save.lstar));
> -    env->cstar = ldq_phys(addr + offsetof(struct vmcb, save.cstar));
> -    env->fmask = ldq_phys(addr + offsetof(struct vmcb, save.sfmask));
> +    env->kernelgsbase = lduq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base));
> +    env->lstar = lduq_phys(addr + offsetof(struct vmcb, save.lstar));
> +    env->cstar = lduq_phys(addr + offsetof(struct vmcb, save.cstar));
> +    env->fmask = lduq_phys(addr + offsetof(struct vmcb, save.sfmask));
>  #endif
> -    env->star = ldq_phys(addr + offsetof(struct vmcb, save.star));
> -    env->sysenter_cs = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_cs));
> -    env->sysenter_esp = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_esp));
> -    env->sysenter_eip = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_eip));
> +    env->star = lduq_phys(addr + offsetof(struct vmcb, save.star));
> +    env->sysenter_cs = lduq_phys(addr + offsetof(struct vmcb, save.sysenter_cs));
> +    env->sysenter_esp = lduq_phys(addr + offsetof(struct vmcb, save.sysenter_esp));
> +    env->sysenter_eip = lduq_phys(addr + offsetof(struct vmcb, save.sysenter_eip));
>  }
>  
>  void helper_vmsave(target_ulong addr)
>  {
>      if (loglevel & CPU_LOG_TB_IN_ASM)
>          fprintf(logfile,"vmsave! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
> -                addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
> +                addr, lduq_phys(addr + offsetof(struct vmcb, save.fs.base)),
>                  env->segs[R_FS].base);
>  
>      SVM_SAVE_SEG(addr, segs[R_FS], fs);
> @@ -4228,7 +4244,7 @@ int svm_check_intercept_param(uint32_t t
>      case SVM_EXIT_IOIO:
>          if (INTERCEPTED(1ULL << INTERCEPT_IOIO_PROT)) {
>              /* FIXME: this should be read in at vmrun (faster this way?) */
> -            uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.iopm_base_pa));
> +            uint64_t addr = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, control.iopm_base_pa));
>              uint16_t port = (uint16_t) (param >> 16);
>  
>              if(ldub_phys(addr + port / 8) & (1 << (port % 8)))
> @@ -4239,7 +4255,7 @@ int svm_check_intercept_param(uint32_t t
>      case SVM_EXIT_MSR:
>          if (INTERCEPTED(1ULL << INTERCEPT_MSR_PROT)) {
>              /* FIXME: this should be read in at vmrun (faster this way?) */
> -            uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.msrpm_base_pa));
> +            uint64_t addr = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, control.msrpm_base_pa));
>              switch((uint32_t)ECX) {
>              case 0 ... 0x1fff:
>                  T0 = (ECX * 2) % 8;
> @@ -4281,7 +4297,7 @@ void vmexit(uint64_t exit_code, uint64_t
>      if (loglevel & CPU_LOG_TB_IN_ASM)
>          fprintf(logfile,"vmexit(%016" PRIx64 ", %016" PRIx64 ", %016" PRIx64 ", " TARGET_FMT_lx ")!\n",
>                  exit_code, exit_info_1,
> -                ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2)),
> +                lduq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2)),
>                  EIP);
>  
>      if(env->hflags & HF_INHIBIT_IRQ_MASK) {
> @@ -4309,7 +4325,7 @@ void vmexit(uint64_t exit_code, uint64_t
>      stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]);
>      stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]);
>  
> -    if ((int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
> +    if ((int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
>          int_ctl &= ~V_TPR_MASK;
>          int_ctl |= env->cr[8] & V_TPR_MASK;
>          stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl);
> @@ -4329,27 +4345,27 @@ void vmexit(uint64_t exit_code, uint64_t
>      env->intercept_exceptions = 0;
>      env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
>  
> -    env->gdt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
> -    env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
> +    env->gdt.base  = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
> +    env->gdt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
>  
> -    env->idt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
> -    env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
> +    env->idt.base  = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
> +    env->idt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
>  
> -    cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
> -    cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
> -    cpu_x86_update_cr3(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3)));
> +    cpu_x86_update_cr0(env, lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
> +    cpu_x86_update_cr4(env, lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
> +    cpu_x86_update_cr3(env, lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3)));
>      if (int_ctl & V_INTR_MASKING_MASK)
> -        env->cr[8] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr8));
> +        env->cr[8] = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr8));
>      /* we need to set the efer after the crs so the hidden flags get set properly */
>  #ifdef TARGET_X86_64
> -    env->efer  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer));
> +    env->efer  = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer));
>      env->hflags &= ~HF_LMA_MASK;
>      if (env->efer & MSR_EFER_LMA)
>         env->hflags |= HF_LMA_MASK;
>  #endif
>  
>      env->eflags = 0;
> -    load_eflags(ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags)),
> +    load_eflags(lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags)),
>                  ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
>      CC_OP = CC_OP_EFLAGS;
>  
> @@ -4358,12 +4374,12 @@ void vmexit(uint64_t exit_code, uint64_t
>      SVM_LOAD_SEG(env->vm_hsave, SS, ss);
>      SVM_LOAD_SEG(env->vm_hsave, DS, ds);
>  
> -    EIP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip));
> -    ESP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp));
> -    EAX = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax));
> +    EIP = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip));
> +    ESP = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp));
> +    EAX = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax));
>  
> -    env->dr[6] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6));
> -    env->dr[7] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7));
> +    env->dr[6] = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6));
> +    env->dr[7] = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7));
>  
>      /* other setups */
>      cpu_x86_set_cpl(env, 0);
> Index: target-i386/helper2.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-i386/helper2.c,v
> retrieving revision 1.53
> diff -u -d -d -p -r1.53 helper2.c
> --- target-i386/helper2.c	14 Oct 2007 07:07:06 -0000	1.53
> +++ target-i386/helper2.c	16 Oct 2007 11:39:06 -0000
> @@ -641,7 +641,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
>  
>              pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
>                  env->a20_mask;
> -            pml4e = ldq_phys(pml4e_addr);
> +            pml4e = lduq_phys(pml4e_addr);
>              if (!(pml4e & PG_PRESENT_MASK)) {
>                  error_code = 0;
>                  goto do_fault;
> @@ -657,7 +657,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
>              ptep = pml4e ^ PG_NX_MASK;
>              pdpe_addr = ((pml4e & PHYS_ADDR_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
>                  env->a20_mask;
> -            pdpe = ldq_phys(pdpe_addr);
> +            pdpe = lduq_phys(pdpe_addr);
>              if (!(pdpe & PG_PRESENT_MASK)) {
>                  error_code = 0;
>                  goto do_fault;
> @@ -677,7 +677,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
>              /* XXX: load them when cr3 is loaded ? */
>              pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
>                  env->a20_mask;
> -            pdpe = ldq_phys(pdpe_addr);
> +            pdpe = lduq_phys(pdpe_addr);
>              if (!(pdpe & PG_PRESENT_MASK)) {
>                  error_code = 0;
>                  goto do_fault;
> @@ -687,7 +687,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
>  
>          pde_addr = ((pdpe & PHYS_ADDR_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
>              env->a20_mask;
> -        pde = ldq_phys(pde_addr);
> +        pde = lduq_phys(pde_addr);
>          if (!(pde & PG_PRESENT_MASK)) {
>              error_code = 0;
>              goto do_fault;
> @@ -731,7 +731,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
>              }
>              pte_addr = ((pde & PHYS_ADDR_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
>                  env->a20_mask;
> -            pte = ldq_phys(pte_addr);
> +            pte = lduq_phys(pte_addr);
>              if (!(pte & PG_PRESENT_MASK)) {
>                  error_code = 0;
>                  goto do_fault;
> @@ -772,7 +772,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
>          /* page directory entry */
>          pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) &
>              env->a20_mask;
> -        pde = ldl_phys(pde_addr);
> +        pde = ldul_phys(pde_addr);
>          if (!(pde & PG_PRESENT_MASK)) {
>              error_code = 0;
>              goto do_fault;
> @@ -810,7 +810,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
>              /* page directory entry */
>              pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
>                  env->a20_mask;
> -            pte = ldl_phys(pte_addr);
> +            pte = ldul_phys(pte_addr);
>              if (!(pte & PG_PRESENT_MASK)) {
>                  error_code = 0;
>                  goto do_fault;
> @@ -910,13 +910,13 @@ target_phys_addr_t cpu_get_phys_page_deb
>  
>              pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
>                  env->a20_mask;
> -            pml4e = ldl_phys(pml4e_addr);
> +            pml4e = ldul_phys(pml4e_addr);
>              if (!(pml4e & PG_PRESENT_MASK))
>                  return -1;
>  
>              pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) &
>                  env->a20_mask;
> -            pdpe = ldl_phys(pdpe_addr);
> +            pdpe = ldul_phys(pdpe_addr);
>              if (!(pdpe & PG_PRESENT_MASK))
>                  return -1;
>          } else
> @@ -924,14 +924,14 @@ target_phys_addr_t cpu_get_phys_page_deb
>          {
>              pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
>                  env->a20_mask;
> -            pdpe = ldl_phys(pdpe_addr);
> +            pdpe = ldul_phys(pdpe_addr);
>              if (!(pdpe & PG_PRESENT_MASK))
>                  return -1;
>          }
>  
>          pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) &
>              env->a20_mask;
> -        pde = ldl_phys(pde_addr);
> +        pde = ldul_phys(pde_addr);
>          if (!(pde & PG_PRESENT_MASK)) {
>              return -1;
>          }
> @@ -944,7 +944,7 @@ target_phys_addr_t cpu_get_phys_page_deb
>              pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) &
>                  env->a20_mask;
>              page_size = 4096;
> -            pte = ldl_phys(pte_addr);
> +            pte = ldul_phys(pte_addr);
>          }
>      } else {
>          if (!(env->cr[0] & CR0_PG_MASK)) {
> @@ -953,7 +953,7 @@ target_phys_addr_t cpu_get_phys_page_deb
>          } else {
>              /* page directory entry */
>              pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
> -            pde = ldl_phys(pde_addr);
> +            pde = ldul_phys(pde_addr);
>              if (!(pde & PG_PRESENT_MASK))
>                  return -1;
>              if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
> @@ -962,7 +962,7 @@ target_phys_addr_t cpu_get_phys_page_deb
>              } else {
>                  /* page directory entry */
>                  pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
> -                pte = ldl_phys(pte_addr);
> +                pte = ldul_phys(pte_addr);
>                  if (!(pte & PG_PRESENT_MASK))
>                      return -1;
>                  page_size = 4096;
> Index: target-i386/op.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-i386/op.c,v
> retrieving revision 1.51
> diff -u -d -d -p -r1.51 op.c
> --- target-i386/op.c	23 Sep 2007 15:28:04 -0000	1.51
> +++ target-i386/op.c	16 Oct 2007 11:39:06 -0000
> @@ -716,8 +716,8 @@ void OPPROTO op_boundw(void)
>  void OPPROTO op_boundl(void)
>  {
>      int low, high, v;
> -    low = ldl(A0);
> -    high = ldl(A0 + 4);
> +    low = ldul(A0);
> +    high = ldul(A0 + 4);
>      v = T0;
>      if (v < low || v > high) {
>          raise_exception(EXCP05_BOUND);
> @@ -747,8 +747,6 @@ void OPPROTO op_exit_tb(void)
>  
>  /* multiple size ops */
>  
> -#define ldul ldl
> -
>  #define SHIFT 0
>  #include "ops_template.h"
>  #undef SHIFT
> @@ -1688,7 +1686,7 @@ CCTable cc_table[CC_OP_NB] = {
>  void OPPROTO op_flds_FT0_A0(void)
>  {
>  #ifdef USE_FP_CONVERT
> -    FP_CONVERT.i32 = ldl(A0);
> +    FP_CONVERT.i32 = ldul(A0);
>      FT0 = FP_CONVERT.f;
>  #else
>      FT0 = ldfl(A0);
> @@ -1698,7 +1696,7 @@ void OPPROTO op_flds_FT0_A0(void)
>  void OPPROTO op_fldl_FT0_A0(void)
>  {
>  #ifdef USE_FP_CONVERT
> -    FP_CONVERT.i64 = ldq(A0);
> +    FP_CONVERT.i64 = lduq(A0);
>      FT0 = FP_CONVERT.d;
>  #else
>      FT0 = ldfq(A0);
> @@ -1715,12 +1713,12 @@ void helper_fild_FT0_A0(void)
>  
>  void helper_fildl_FT0_A0(void)
>  {
> -    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
> +    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
>  }
>  
>  void helper_fildll_FT0_A0(void)
>  {
> -    FT0 = (CPU86_LDouble)((int64_t)ldq(A0));
> +    FT0 = (CPU86_LDouble)((int64_t)lduq(A0));
>  }
>  
>  void OPPROTO op_fild_FT0_A0(void)
> @@ -1753,20 +1751,20 @@ void OPPROTO op_fild_FT0_A0(void)
>  void OPPROTO op_fildl_FT0_A0(void)
>  {
>  #ifdef USE_FP_CONVERT
> -    FP_CONVERT.i32 = (int32_t) ldl(A0);
> +    FP_CONVERT.i32 = (int32_t) ldul(A0);
>      FT0 = (CPU86_LDouble)FP_CONVERT.i32;
>  #else
> -    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
> +    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
>  #endif
>  }
>  
>  void OPPROTO op_fildll_FT0_A0(void)
>  {
>  #ifdef USE_FP_CONVERT
> -    FP_CONVERT.i64 = (int64_t) ldq(A0);
> +    FP_CONVERT.i64 = (int64_t) lduq(A0);
>      FT0 = (CPU86_LDouble)FP_CONVERT.i64;
>  #else
> -    FT0 = (CPU86_LDouble)((int64_t)ldq(A0));
> +    FT0 = (CPU86_LDouble)((int64_t)lduq(A0));
>  #endif
>  }
>  #endif
> @@ -1778,7 +1776,7 @@ void OPPROTO op_flds_ST0_A0(void)
>      int new_fpstt;
>      new_fpstt = (env->fpstt - 1) & 7;
>  #ifdef USE_FP_CONVERT
> -    FP_CONVERT.i32 = ldl(A0);
> +    FP_CONVERT.i32 = ldul(A0);
>      env->fpregs[new_fpstt].d = FP_CONVERT.f;
>  #else
>      env->fpregs[new_fpstt].d = ldfl(A0);
> @@ -1792,7 +1790,7 @@ void OPPROTO op_fldl_ST0_A0(void)
>      int new_fpstt;
>      new_fpstt = (env->fpstt - 1) & 7;
>  #ifdef USE_FP_CONVERT
> -    FP_CONVERT.i64 = ldq(A0);
> +    FP_CONVERT.i64 = lduq(A0);
>      env->fpregs[new_fpstt].d = FP_CONVERT.d;
>  #else
>      env->fpregs[new_fpstt].d = ldfq(A0);
> @@ -1822,7 +1820,7 @@ void helper_fildl_ST0_A0(void)
>  {
>      int new_fpstt;
>      new_fpstt = (env->fpstt - 1) & 7;
> -    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
> +    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
>      env->fpstt = new_fpstt;
>      env->fptags[new_fpstt] = 0; /* validate stack entry */
>  }
> @@ -1831,7 +1829,7 @@ void helper_fildll_ST0_A0(void)
>  {
>      int new_fpstt;
>      new_fpstt = (env->fpstt - 1) & 7;
> -    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)ldq(A0));
> +    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)lduq(A0));
>      env->fpstt = new_fpstt;
>      env->fptags[new_fpstt] = 0; /* validate stack entry */
>  }
> @@ -1872,10 +1870,10 @@ void OPPROTO op_fildl_ST0_A0(void)
>      int new_fpstt;
>      new_fpstt = (env->fpstt - 1) & 7;
>  #ifdef USE_FP_CONVERT
> -    FP_CONVERT.i32 = (int32_t) ldl(A0);
> +    FP_CONVERT.i32 = (int32_t) ldul(A0);
>      env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i32;
>  #else
> -    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
> +    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
>  #endif
>      env->fpstt = new_fpstt;
>      env->fptags[new_fpstt] = 0; /* validate stack entry */
> @@ -1886,10 +1884,10 @@ void OPPROTO op_fildll_ST0_A0(void)
>      int new_fpstt;
>      new_fpstt = (env->fpstt - 1) & 7;
>  #ifdef USE_FP_CONVERT
> -    FP_CONVERT.i64 = (int64_t) ldq(A0);
> +    FP_CONVERT.i64 = (int64_t) lduq(A0);
>      env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i64;
>  #else
> -    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)ldq(A0));
> +    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)lduq(A0));
>  #endif
>      env->fpstt = new_fpstt;
>      env->fptags[new_fpstt] = 0; /* validate stack entry */
> Index: target-i386/ops_mem.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-i386/ops_mem.h,v
> retrieving revision 1.7
> diff -u -d -d -p -r1.7 ops_mem.h
> --- target-i386/ops_mem.h	28 Nov 2005 21:02:17 -0000	1.7
> +++ target-i386/ops_mem.h	16 Oct 2007 11:39:06 -0000
> @@ -20,7 +20,7 @@ void OPPROTO glue(glue(op_ldsw, MEMSUFFI
>  
>  void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T0_A0)(void)
>  {
> -    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
> +    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
>  }
>  
>  void OPPROTO glue(glue(op_ldub, MEMSUFFIX), _T1_A0)(void)
> @@ -45,7 +45,7 @@ void OPPROTO glue(glue(op_ldsw, MEMSUFFI
>  
>  void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T1_A0)(void)
>  {
> -    T1 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
> +    T1 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
>  }
>  
>  void OPPROTO glue(glue(op_stb, MEMSUFFIX), _T0_A0)(void)
> @@ -91,7 +91,7 @@ void OPPROTO glue(glue(op_ldq, MEMSUFFIX
>  {
>      uint64_t *p;
>      p = (uint64_t *)((char *)env + PARAM1);
> -    *p = glue(ldq, MEMSUFFIX)(A0);
> +    *p = glue(lduq, MEMSUFFIX)(A0);
>  }
>  
>  void OPPROTO glue(glue(op_stq, MEMSUFFIX), _env_A0)(void)
> @@ -106,8 +106,8 @@ void OPPROTO glue(glue(op_ldo, MEMSUFFIX
>  {
>      XMMReg *p;
>      p = (XMMReg *)((char *)env + PARAM1);
> -    p->XMM_Q(0) = glue(ldq, MEMSUFFIX)(A0);
> -    p->XMM_Q(1) = glue(ldq, MEMSUFFIX)(A0 + 8);
> +    p->XMM_Q(0) = glue(lduq, MEMSUFFIX)(A0);
> +    p->XMM_Q(1) = glue(lduq, MEMSUFFIX)(A0 + 8);
>  }
>  
>  void OPPROTO glue(glue(op_sto, MEMSUFFIX), _env_A0)(void)
> @@ -122,22 +122,22 @@ void OPPROTO glue(glue(op_sto, MEMSUFFIX
>  #ifdef TARGET_X86_64
>  void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T0_A0)(void)
>  {
> -    T0 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
> +    T0 = glue(ldsl, MEMSUFFIX)(A0);
>  }
>  
>  void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T1_A0)(void)
>  {
> -    T1 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
> +    T1 = glue(ldsl, MEMSUFFIX)(A0);
>  }
>  
>  void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T0_A0)(void)
>  {
> -    T0 = glue(ldq, MEMSUFFIX)(A0);
> +    T0 = glue(lduq, MEMSUFFIX)(A0);
>  }
>  
>  void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T1_A0)(void)
>  {
> -    T1 = glue(ldq, MEMSUFFIX)(A0);
> +    T1 = glue(lduq, MEMSUFFIX)(A0);
>  }
>  
>  void OPPROTO glue(glue(op_stq, MEMSUFFIX), _T0_A0)(void)
> Index: target-i386/svm.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-i386/svm.h,v
> retrieving revision 1.1
> diff -u -d -d -p -r1.1 svm.h
> --- target-i386/svm.h	23 Sep 2007 15:30:28 -0000	1.1
> +++ target-i386/svm.h	16 Oct 2007 11:39:06 -0000
> @@ -339,14 +339,14 @@ static inline int svm_check_intercept(un
>      cpu_x86_load_seg_cache(env, \
>                      R_##seg_index, \
>                      lduw_phys(addr + offsetof(struct vmcb, save.seg.selector)),\
> -                    ldq_phys(addr + offsetof(struct vmcb, save.seg.base)),\
> -                    ldl_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
> -                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), ldq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldl_phys(addr + offsetof(struct vmcb, save.seg.limit))))
> +                    lduq_phys(addr + offsetof(struct vmcb, save.seg.base)),\
> +                    ldul_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
> +                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), lduq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldul_phys(addr + offsetof(struct vmcb, save.seg.limit))))
>  
>  #define SVM_LOAD_SEG2(addr, seg_qemu, seg_vmcb) \
>      env->seg_qemu.selector  = lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.selector)); \
> -    env->seg_qemu.base      = ldq_phys(addr + offsetof(struct vmcb, save.seg_vmcb.base)); \
> -    env->seg_qemu.limit     = ldl_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
> +    env->seg_qemu.base      = lduq_phys(addr + offsetof(struct vmcb, save.seg_vmcb.base)); \
> +    env->seg_qemu.limit     = ldul_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
>      env->seg_qemu.flags     = vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.attrib)), env->seg_qemu.base, env->seg_qemu.limit)
>  
>  #define SVM_SAVE_SEG(addr, seg_qemu, seg_vmcb) \
> Index: target-i386/translate-copy.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-i386/translate-copy.c,v
> retrieving revision 1.9
> diff -u -d -d -p -r1.9 translate-copy.c
> --- target-i386/translate-copy.c	17 Sep 2007 08:09:52 -0000	1.9
> +++ target-i386/translate-copy.c	16 Oct 2007 11:39:06 -0000
> @@ -207,7 +207,7 @@ static inline void gen_lea_modrm(DisasCo
>          case 0:
>              if (base == 5) {
>                  base = -1;
> -                disp = ldl_code(s->pc);
> +                disp = ldul_code(s->pc);
>                  s->pc += 4;
>              } else {
>                  disp = 0;
> @@ -218,7 +218,7 @@ static inline void gen_lea_modrm(DisasCo
>              break;
>          default:
>          case 2:
> -            disp = ldl_code(s->pc);
> +            disp = ldul_code(s->pc);
>              s->pc += 4;
>              break;
>          }
> @@ -266,7 +266,7 @@ static inline uint32_t insn_get(DisasCon
>          break;
>      default:
>      case OT_LONG:
> -        ret = ldl_code(s->pc);
> +        ret = ldul_code(s->pc);
>          s->pc += 4;
>          break;
>      }
> Index: target-i386/translate.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-i386/translate.c,v
> retrieving revision 1.72
> diff -u -d -d -p -r1.72 translate.c
> --- target-i386/translate.c	27 Sep 2007 01:52:00 -0000	1.72
> +++ target-i386/translate.c	16 Oct 2007 11:39:07 -0000
> @@ -1462,7 +1462,7 @@ static void gen_lea_modrm(DisasContext *
>          case 0:
>              if ((base & 7) == 5) {
>                  base = -1;
> -                disp = (int32_t)ldl_code(s->pc);
> +                disp = (int32_t)ldul_code(s->pc);
>                  s->pc += 4;
>                  if (CODE64(s) && !havesib) {
>                      disp += s->pc + s->rip_offset;
> @@ -1476,7 +1476,7 @@ static void gen_lea_modrm(DisasContext *
>              break;
>          default:
>          case 2:
> -            disp = ldl_code(s->pc);
> +            disp = ldul_code(s->pc);
>              s->pc += 4;
>              break;
>          }
> @@ -1736,7 +1736,7 @@ static inline uint32_t insn_get(DisasCon
>          break;
>      default:
>      case OT_LONG:
> -        ret = ldl_code(s->pc);
> +        ret = ldul_code(s->pc);
>          s->pc += 4;
>          break;
>      }
> @@ -4190,7 +4190,7 @@ static target_ulong disas_insn(DisasCont
>                  ot = dflag + OT_WORD;
>  #ifdef TARGET_X86_64
>              if (s->aflag == 2) {
> -                offset_addr = ldq_code(s->pc);
> +                offset_addr = lduq_code(s->pc);
>                  s->pc += 8;
>                  if (offset_addr == (int32_t)offset_addr)
>                      gen_op_movq_A0_im(offset_addr);
> @@ -4243,7 +4243,7 @@ static target_ulong disas_insn(DisasCont
>          if (dflag == 2) {
>              uint64_t tmp;
>              /* 64 bit case */
> -            tmp = ldq_code(s->pc);
> +            tmp = lduq_code(s->pc);
>              s->pc += 8;
>              reg = (b & 7) | REX_B(s);
>              gen_movtl_T0_im(tmp);
> Index: target-m68k/exec.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-m68k/exec.h,v
> retrieving revision 1.5
> diff -u -d -d -p -r1.5 exec.h
> --- target-m68k/exec.h	14 Oct 2007 07:07:06 -0000	1.5
> +++ target-m68k/exec.h	16 Oct 2007 11:39:07 -0000
> @@ -42,6 +42,9 @@ int cpu_m68k_handle_mmu_fault (CPUState 
>  
>  #if !defined(CONFIG_USER_ONLY)
>  #include "softmmu_exec.h"
> +#define REVERSE_ENDIAN
> +#include "softmmu_exec.h"
> +#undef REVERSE_ENDIAN
>  #endif
>  
>  void cpu_m68k_flush_flags(CPUM68KState *env, int cc_op);
> Index: target-m68k/op_helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-m68k/op_helper.c,v
> retrieving revision 1.7
> diff -u -d -d -p -r1.7 op_helper.c
> --- target-m68k/op_helper.c	14 Oct 2007 07:07:06 -0000	1.7
> +++ target-m68k/op_helper.c	16 Oct 2007 11:39:07 -0000
> @@ -33,6 +33,21 @@ extern int semihosting_enabled;
>  #define MMUSUFFIX _mmu
>  #define GETPC() (__builtin_return_address(0))
>  
> +/* Native-endian */
> +#define SHIFT 0
> +#include "softmmu_template.h"
> +
> +#define SHIFT 1
> +#include "softmmu_template.h"
> +
> +#define SHIFT 2
> +#include "softmmu_template.h"
> +
> +#define SHIFT 3
> +#include "softmmu_template.h"
> +
> +/* Reverse-endian */
> +#define REVERSE_ENDIAN
>  #define SHIFT 0
>  #include "softmmu_template.h"
>  
> @@ -44,6 +59,7 @@ extern int semihosting_enabled;
>  
>  #define SHIFT 3
>  #include "softmmu_template.h"
> +#undef REVERSE_ENDIAN
>  
>  /* Try to fill the TLB and return an exception if error. If retaddr is
>     NULL, it means that the function was called in C code (i.e. not
> @@ -83,8 +99,8 @@ static void do_rte(void)
>      uint32_t fmt;
>  
>      sp = env->aregs[7];
> -    fmt = ldl_kernel(sp);
> -    env->pc = ldl_kernel(sp + 4);
> +    fmt = ldul_kernel(sp);
> +    env->pc = ldul_kernel(sp + 4);
>      sp |= (fmt >> 28) & 3;
>      env->sr = fmt & 0xffff;
>      m68k_switch_sp(env);
> @@ -112,7 +128,7 @@ void do_interrupt(int is_hw)
>                      && (env->sr & SR_S) != 0
>                      && (env->pc & 3) == 0
>                      && lduw_code(env->pc - 4) == 0x4e71
> -                    && ldl_code(env->pc) == 0x4e7bf000) {
> +                    && ldul_code(env->pc) == 0x4e7bf000) {
>                  env->pc += 4;
>                  do_m68k_semihosting(env, env->dregs[0]);
>                  return;
> @@ -153,7 +169,7 @@ void do_interrupt(int is_hw)
>      stl_kernel(sp, fmt);
>      env->aregs[7] = sp;
>      /* Jump to vector.  */
> -    env->pc = ldl_kernel(env->vbr + vector);
> +    env->pc = ldul_kernel(env->vbr + vector);
>  }
>  
>  #endif
> Index: target-m68k/op_mem.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-m68k/op_mem.h,v
> retrieving revision 1.1
> diff -u -d -d -p -r1.1 op_mem.h
> --- target-m68k/op_mem.h	23 May 2007 19:58:11 -0000	1.1
> +++ target-m68k/op_mem.h	16 Oct 2007 11:39:07 -0000
> @@ -11,7 +11,7 @@ MEM_LD_OP(8u32,ub)
>  MEM_LD_OP(8s32,sb)
>  MEM_LD_OP(16u32,uw)
>  MEM_LD_OP(16s32,sw)
> -MEM_LD_OP(32,l)
> +MEM_LD_OP(32,ul)
>  
>  #undef MEM_LD_OP
>  
> Index: target-mips/exec.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-mips/exec.h,v
> retrieving revision 1.39
> diff -u -d -d -p -r1.39 exec.h
> --- target-mips/exec.h	14 Oct 2007 07:07:07 -0000	1.39
> +++ target-mips/exec.h	16 Oct 2007 11:39:07 -0000
> @@ -54,6 +54,9 @@ register target_ulong T2 asm(AREG3);
>  
>  #if !defined(CONFIG_USER_ONLY)
>  #include "softmmu_exec.h"
> +#define REVERSE_ENDIAN
> +#include "softmmu_exec.h"
> +#undef REVERSE_ENDIAN
>  #endif /* !defined(CONFIG_USER_ONLY) */
>  
>  #if defined(TARGET_MIPSN32) || defined(TARGET_MIPS64)
> Index: target-mips/op_helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-mips/op_helper.c,v
> retrieving revision 1.66
> diff -u -d -d -p -r1.66 op_helper.c
> --- target-mips/op_helper.c	14 Oct 2007 07:07:07 -0000	1.66
> +++ target-mips/op_helper.c	16 Oct 2007 11:39:07 -0000
> @@ -544,6 +544,21 @@ static void do_unaligned_access (target_
>  #define MMUSUFFIX _mmu
>  #define ALIGNED_ONLY
>  
> +/* Native-endian */
> +#define SHIFT 0
> +#include "softmmu_template.h"
> +
> +#define SHIFT 1
> +#include "softmmu_template.h"
> +
> +#define SHIFT 2
> +#include "softmmu_template.h"
> +
> +#define SHIFT 3
> +#include "softmmu_template.h"
> +
> +/* Reverse-endian */
> +#define REVERSE_ENDIAN
>  #define SHIFT 0
>  #include "softmmu_template.h"
>  
> @@ -555,6 +570,7 @@ static void do_unaligned_access (target_
>  
>  #define SHIFT 3
>  #include "softmmu_template.h"
> +#undef REVERSE_ENDIAN
>  
>  static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr)
>  {
> Index: target-mips/op_mem.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-mips/op_mem.c,v
> retrieving revision 1.14
> diff -u -d -d -p -r1.14 op_mem.c
> --- target-mips/op_mem.c	9 Oct 2007 03:12:08 -0000	1.14
> +++ target-mips/op_mem.c	16 Oct 2007 11:39:07 -0000
> @@ -57,13 +57,13 @@ void glue(op_sh, MEMSUFFIX) (void)
>  
>  void glue(op_lw, MEMSUFFIX) (void)
>  {
> -    T0 = glue(ldl, MEMSUFFIX)(T0);
> +    T0 = glue(ldul, MEMSUFFIX)(T0);
>      RETURN();
>  }
>  
>  void glue(op_lwu, MEMSUFFIX) (void)
>  {
> -    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(T0);
> +    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(T0);
>      RETURN();
>  }
>  
> @@ -167,7 +167,7 @@ void glue(op_swr, MEMSUFFIX) (void)
>  void glue(op_ll, MEMSUFFIX) (void)
>  {
>      T1 = T0;
> -    T0 = glue(ldl, MEMSUFFIX)(T0);
> +    T0 = glue(ldul, MEMSUFFIX)(T0);
>      env->CP0_LLAddr = T1;
>      RETURN();
>  }
> @@ -191,7 +191,7 @@ void glue(op_sc, MEMSUFFIX) (void)
>  #if defined(TARGET_MIPSN32) || defined(TARGET_MIPS64)
>  void glue(op_ld, MEMSUFFIX) (void)
>  {
> -    T0 = glue(ldq, MEMSUFFIX)(T0);
> +    T0 = glue(lduq, MEMSUFFIX)(T0);
>      RETURN();
>  }
>  
> @@ -359,7 +359,7 @@ void glue(op_sdr, MEMSUFFIX) (void)
>  void glue(op_lld, MEMSUFFIX) (void)
>  {
>      T1 = T0;
> -    T0 = glue(ldq, MEMSUFFIX)(T0);
> +    T0 = glue(lduq, MEMSUFFIX)(T0);
>      env->CP0_LLAddr = T1;
>      RETURN();
>  }
> @@ -383,7 +383,7 @@ void glue(op_scd, MEMSUFFIX) (void)
>  
>  void glue(op_lwc1, MEMSUFFIX) (void)
>  {
> -    WT0 = glue(ldl, MEMSUFFIX)(T0);
> +    WT0 = glue(ldul, MEMSUFFIX)(T0);
>      RETURN();
>  }
>  void glue(op_swc1, MEMSUFFIX) (void)
> @@ -393,7 +393,7 @@ void glue(op_swc1, MEMSUFFIX) (void)
>  }
>  void glue(op_ldc1, MEMSUFFIX) (void)
>  {
> -    DT0 = glue(ldq, MEMSUFFIX)(T0);
> +    DT0 = glue(lduq, MEMSUFFIX)(T0);
>      RETURN();
>  }
>  void glue(op_sdc1, MEMSUFFIX) (void)
> @@ -403,7 +403,7 @@ void glue(op_sdc1, MEMSUFFIX) (void)
>  }
>  void glue(op_luxc1, MEMSUFFIX) (void)
>  {
> -    DT0 = glue(ldq, MEMSUFFIX)(T0 & ~0x7);
> +    DT0 = glue(lduq, MEMSUFFIX)(T0 & ~0x7);
>      RETURN();
>  }
>  void glue(op_suxc1, MEMSUFFIX) (void)
> Index: target-mips/translate.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-mips/translate.c,v
> retrieving revision 1.106
> diff -u -d -d -p -r1.106 translate.c
> --- target-mips/translate.c	9 Oct 2007 03:39:58 -0000	1.106
> +++ target-mips/translate.c	16 Oct 2007 11:39:07 -0000
> @@ -6544,7 +6544,7 @@ gen_intermediate_code_internal (CPUState
>              gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
>              gen_opc_instr_start[lj] = 1;
>          }
> -        ctx.opcode = ldl_code(ctx.pc);
> +        ctx.opcode = ldul_code(ctx.pc);
>          decode_opc(env, &ctx);
>          ctx.pc += 4;
>  
> Index: target-ppc/exec.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-ppc/exec.h,v
> retrieving revision 1.29
> diff -u -d -d -p -r1.29 exec.h
> --- target-ppc/exec.h	14 Oct 2007 07:07:07 -0000	1.29
> +++ target-ppc/exec.h	16 Oct 2007 11:39:07 -0000
> @@ -91,7 +91,12 @@ static always_inline target_ulong rotl64
>  #endif
>  
>  #if !defined(CONFIG_USER_ONLY)
> +
> +#include "softmmu_exec.h"
> +#define REVERSE_ENDIAN
>  #include "softmmu_exec.h"
> +#undef REVERSE_ENDIAN
> +
>  #endif /* !defined(CONFIG_USER_ONLY) */
>  
>  void do_raise_exception_err (uint32_t exception, int error_code);
> Index: target-ppc/helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-ppc/helper.c,v
> retrieving revision 1.80
> diff -u -d -d -p -r1.80 helper.c
> --- target-ppc/helper.c	14 Oct 2007 10:21:20 -0000	1.80
> +++ target-ppc/helper.c	16 Oct 2007 11:39:07 -0000
> @@ -554,8 +554,8 @@ static always_inline int _find_pte (mmu_
>      for (i = 0; i < 8; i++) {
>  #if defined(TARGET_PPC64)
>          if (is_64b) {
> -            pte0 = ldq_phys(base + (i * 16));
> -            pte1 =  ldq_phys(base + (i * 16) + 8);
> +            pte0 = lduq_phys(base + (i * 16));
> +            pte1 =  lduq_phys(base + (i * 16) + 8);
>              r = pte64_check(ctx, pte0, pte1, h, rw, type);
>  #if defined (DEBUG_MMU)
>              if (loglevel != 0) {
> @@ -569,8 +569,8 @@ static always_inline int _find_pte (mmu_
>          } else
>  #endif
>          {
> -            pte0 = ldl_phys(base + (i * 8));
> -            pte1 =  ldl_phys(base + (i * 8) + 4);
> +            pte0 = ldul_phys(base + (i * 8));
> +            pte1 =  ldul_phys(base + (i * 8) + 4);
>              r = pte32_check(ctx, pte0, pte1, h, rw, type);
>  #if defined (DEBUG_MMU)
>              if (loglevel != 0) {
> @@ -685,8 +685,8 @@ static int slb_lookup (CPUPPCState *env,
>  #endif
>      mask = 0x0000000000000000ULL; /* Avoid gcc warning */
>      for (n = 0; n < env->slb_nr; n++) {
> -        tmp64 = ldq_phys(sr_base);
> -        tmp = ldl_phys(sr_base + 8);
> +        tmp64 = lduq_phys(sr_base);
> +        tmp = ldul_phys(sr_base + 8);
>  #if defined(DEBUG_SLB)
>          if (loglevel != 0) {
>              fprintf(logfile, "%s: seg %d " PADDRX " %016" PRIx64 " %08"
> @@ -733,7 +733,7 @@ void ppc_slb_invalidate_all (CPUPPCState
>      do_invalidate = 0;
>      sr_base = env->spr[SPR_ASR];
>      for (n = 0; n < env->slb_nr; n++) {
> -        tmp64 = ldq_phys(sr_base);
> +        tmp64 = lduq_phys(sr_base);
>          if (slb_is_valid(tmp64)) {
>              slb_invalidate(&tmp64);
>              stq_phys(sr_base, tmp64);
> @@ -761,7 +761,7 @@ void ppc_slb_invalidate_one (CPUPPCState
>      if (n >= 0) {
>          sr_base = env->spr[SPR_ASR];
>          sr_base += 12 * n;
> -        tmp64 = ldq_phys(sr_base);
> +        tmp64 = lduq_phys(sr_base);
>          if (slb_is_valid(tmp64)) {
>              slb_invalidate(&tmp64);
>              stq_phys(sr_base, tmp64);
> @@ -783,8 +783,8 @@ target_ulong ppc_load_slb (CPUPPCState *
>  
>      sr_base = env->spr[SPR_ASR];
>      sr_base += 12 * slb_nr;
> -    tmp64 = ldq_phys(sr_base);
> -    tmp = ldl_phys(sr_base + 8);
> +    tmp64 = lduq_phys(sr_base);
> +    tmp = ldul_phys(sr_base + 8);
>      if (tmp64 & 0x0000000008000000ULL) {
>          /* SLB entry is valid */
>          /* Copy SLB bits 62:88 to Rt 37:63 (VSID 23:49) */
> @@ -990,10 +990,10 @@ static int get_segment (CPUState *env, m
>                          sdr, mask + 0x80);
>                  for (curaddr = sdr; curaddr < (sdr + mask + 0x80);
>                       curaddr += 16) {
> -                    a0 = ldl_phys(curaddr);
> -                    a1 = ldl_phys(curaddr + 4);
> -                    a2 = ldl_phys(curaddr + 8);
> -                    a3 = ldl_phys(curaddr + 12);
> +                    a0 = ldul_phys(curaddr);
> +                    a1 = ldul_phys(curaddr + 4);
> +                    a2 = ldul_phys(curaddr + 8);
> +                    a3 = ldul_phys(curaddr + 12);
>                      if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) {
>                          fprintf(logfile,
>                                  PADDRX ": %08x %08x %08x %08x\n",
> @@ -2266,7 +2266,7 @@ static always_inline void powerpc_excp (
>  #endif
>          /* XXX: this is false */
>          /* Get rS/rD and rA from faulting opcode */
> -        env->spr[SPR_DSISR] |= (ldl_code((env->nip - 4)) & 0x03FF0000) >> 16;
> +        env->spr[SPR_DSISR] |= (ldul_code((env->nip - 4)) & 0x03FF0000) >> 16;
>          goto store_current;
>      case POWERPC_EXCP_PROGRAM:   /* Program exception                        */
>          switch (env->error_code & ~0xF) {
> Index: target-ppc/op_helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-ppc/op_helper.c,v
> retrieving revision 1.51
> diff -u -d -d -p -r1.51 op_helper.c
> --- target-ppc/op_helper.c	14 Oct 2007 08:27:14 -0000	1.51
> +++ target-ppc/op_helper.c	16 Oct 2007 11:39:08 -0000
> @@ -2296,6 +2296,21 @@ DO_SPE_OP1(fsctuf);
>  #define MMUSUFFIX _mmu
>  #define GETPC() (__builtin_return_address(0))
>  
> +/* Native-endian */
> +#define SHIFT 0
> +#include "softmmu_template.h"
> +
> +#define SHIFT 1
> +#include "softmmu_template.h"
> +
> +#define SHIFT 2
> +#include "softmmu_template.h"
> +
> +#define SHIFT 3
> +#include "softmmu_template.h"
> +
> +/* Reverse-endian */
> +#define REVERSE_ENDIAN
>  #define SHIFT 0
>  #include "softmmu_template.h"
>  
> @@ -2307,6 +2322,7 @@ DO_SPE_OP1(fsctuf);
>  
>  #define SHIFT 3
>  #include "softmmu_template.h"
> +#undef REVERSE_ENDIAN
>  
>  /* try to fill the TLB and return an exception if error. If retaddr is
>     NULL, it means that the function was called in C code (i.e. not
> Index: target-ppc/op_helper.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-ppc/op_helper.h,v
> retrieving revision 1.21
> diff -u -d -d -p -r1.21 op_helper.h
> --- target-ppc/op_helper.h	7 Oct 2007 17:13:44 -0000	1.21
> +++ target-ppc/op_helper.h	16 Oct 2007 11:39:08 -0000
> @@ -37,19 +37,6 @@ void glue(do_POWER2_lfq_le, MEMSUFFIX) (
>  void glue(do_POWER2_stfq, MEMSUFFIX) (void);
>  void glue(do_POWER2_stfq_le, MEMSUFFIX) (void);
>  
> -#if defined(TARGET_PPC64)
> -void glue(do_lsw_64, MEMSUFFIX) (int dst);
> -void glue(do_lsw_le_64, MEMSUFFIX) (int dst);
> -void glue(do_stsw_64, MEMSUFFIX) (int src);
> -void glue(do_stsw_le_64, MEMSUFFIX) (int src);
> -void glue(do_lmw_64, MEMSUFFIX) (int dst);
> -void glue(do_lmw_le_64, MEMSUFFIX) (int dst);
> -void glue(do_stmw_64, MEMSUFFIX) (int src);
> -void glue(do_stmw_le_64, MEMSUFFIX) (int src);
> -void glue(do_icbi_64, MEMSUFFIX) (void);
> -void glue(do_dcbz_64, MEMSUFFIX) (void);
> -#endif
> -
>  #else
>  
>  void do_print_mem_EA (target_ulong EA);
> Index: target-ppc/op_helper_mem.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-ppc/op_helper_mem.h,v
> retrieving revision 1.14
> diff -u -d -d -p -r1.14 op_helper_mem.h
> --- target-ppc/op_helper_mem.h	7 Oct 2007 17:13:44 -0000	1.14
> +++ target-ppc/op_helper_mem.h	16 Oct 2007 11:39:08 -0000
> @@ -19,85 +19,33 @@
>   */
>  
>  /* Multiple word / string load and store */
> -static always_inline target_ulong glue(ld32r, MEMSUFFIX) (target_ulong EA)
> -{
> -    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
> -    return ((tmp & 0xFF000000UL) >> 24) | ((tmp & 0x00FF0000UL) >> 8) |
> -        ((tmp & 0x0000FF00UL) << 8) | ((tmp & 0x000000FFUL) << 24);
> -}
> -
> -static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
> -                                                  target_ulong data)
> -{
> -    uint32_t tmp =
> -        ((data & 0xFF000000UL) >> 24) | ((data & 0x00FF0000UL) >> 8) |
> -        ((data & 0x0000FF00UL) << 8) | ((data & 0x000000FFUL) << 24);
> -    glue(stl, MEMSUFFIX)(EA, tmp);
> -}
> -
>  void glue(do_lmw, MEMSUFFIX) (int dst)
>  {
>      for (; dst < 32; dst++, T0 += 4) {
> -        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
> -    }
> -}
> -
> -#if defined(TARGET_PPC64)
> -void glue(do_lmw_64, MEMSUFFIX) (int dst)
> -{
> -    for (; dst < 32; dst++, T0 += 4) {
> -        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
> +        env->gpr[dst] = glue(ldul, MEMSUFFIX)(T0);
>      }
>  }
> -#endif
>  
>  void glue(do_stmw, MEMSUFFIX) (int src)
>  {
>      for (; src < 32; src++, T0 += 4) {
> -        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
> -    }
> -}
> -
> -#if defined(TARGET_PPC64)
> -void glue(do_stmw_64, MEMSUFFIX) (int src)
> -{
> -    for (; src < 32; src++, T0 += 4) {
> -        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
> +        glue(stl, MEMSUFFIX)(T0, env->gpr[src]);
>      }
>  }
> -#endif
>  
>  void glue(do_lmw_le, MEMSUFFIX) (int dst)
>  {
>      for (; dst < 32; dst++, T0 += 4) {
> -        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
> -    }
> -}
> -
> -#if defined(TARGET_PPC64)
> -void glue(do_lmw_le_64, MEMSUFFIX) (int dst)
> -{
> -    for (; dst < 32; dst++, T0 += 4) {
> -        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
> +        env->gpr[dst] = glue(ldulr, MEMSUFFIX)(T0);
>      }
>  }
> -#endif
>  
>  void glue(do_stmw_le, MEMSUFFIX) (int src)
>  {
>      for (; src < 32; src++, T0 += 4) {
> -        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
> -    }
> -}
> -
> -#if defined(TARGET_PPC64)
> -void glue(do_stmw_le_64, MEMSUFFIX) (int src)
> -{
> -    for (; src < 32; src++, T0 += 4) {
> -        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
> +        glue(stlr, MEMSUFFIX)(T0, env->gpr[src]);
>      }
>  }
> -#endif
>  
>  void glue(do_lsw, MEMSUFFIX) (int dst)
>  {
> @@ -105,71 +53,33 @@ void glue(do_lsw, MEMSUFFIX) (int dst)
>      int sh;
>  
>      for (; T1 > 3; T1 -= 4, T0 += 4) {
> -        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
> -        if (unlikely(dst == 32))
> -            dst = 0;
> -    }
> -    if (unlikely(T1 != 0)) {
> -        tmp = 0;
> -        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
> -            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
> -        }
> -        env->gpr[dst] = tmp;
> -    }
> -}
> -
> -#if defined(TARGET_PPC64)
> -void glue(do_lsw_64, MEMSUFFIX) (int dst)
> -{
> -    uint32_t tmp;
> -    int sh;
> -
> -    for (; T1 > 3; T1 -= 4, T0 += 4) {
> -        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
> +        env->gpr[dst++] = glue(ldul, MEMSUFFIX)(T0);
>          if (unlikely(dst == 32))
>              dst = 0;
>      }
>      if (unlikely(T1 != 0)) {
>          tmp = 0;
>          for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
> -            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
> +            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
>          }
>          env->gpr[dst] = tmp;
>      }
>  }
> -#endif
>  
>  void glue(do_stsw, MEMSUFFIX) (int src)
>  {
>      int sh;
>  
>      for (; T1 > 3; T1 -= 4, T0 += 4) {
> -        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
> -        if (unlikely(src == 32))
> -            src = 0;
> -    }
> -    if (unlikely(T1 != 0)) {
> -        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
> -            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
> -    }
> -}
> -
> -#if defined(TARGET_PPC64)
> -void glue(do_stsw_64, MEMSUFFIX) (int src)
> -{
> -    int sh;
> -
> -    for (; T1 > 3; T1 -= 4, T0 += 4) {
> -        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
> +        glue(stl, MEMSUFFIX)(T0, env->gpr[src++]);
>          if (unlikely(src == 32))
>              src = 0;
>      }
>      if (unlikely(T1 != 0)) {
>          for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
> -            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
> +            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
>      }
>  }
> -#endif
>  
>  void glue(do_lsw_le, MEMSUFFIX) (int dst)
>  {
> @@ -177,71 +87,33 @@ void glue(do_lsw_le, MEMSUFFIX) (int dst
>      int sh;
>  
>      for (; T1 > 3; T1 -= 4, T0 += 4) {
> -        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
> -        if (unlikely(dst == 32))
> -            dst = 0;
> -    }
> -    if (unlikely(T1 != 0)) {
> -        tmp = 0;
> -        for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
> -            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
> -        }
> -        env->gpr[dst] = tmp;
> -    }
> -}
> -
> -#if defined(TARGET_PPC64)
> -void glue(do_lsw_le_64, MEMSUFFIX) (int dst)
> -{
> -    uint32_t tmp;
> -    int sh;
> -
> -    for (; T1 > 3; T1 -= 4, T0 += 4) {
> -        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
> +        env->gpr[dst++] = glue(ldulr, MEMSUFFIX)(T0);
>          if (unlikely(dst == 32))
>              dst = 0;
>      }
>      if (unlikely(T1 != 0)) {
>          tmp = 0;
>          for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
> -            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
> +            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
>          }
>          env->gpr[dst] = tmp;
>      }
>  }
> -#endif
>  
>  void glue(do_stsw_le, MEMSUFFIX) (int src)
>  {
>      int sh;
>  
>      for (; T1 > 3; T1 -= 4, T0 += 4) {
> -        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
> -        if (unlikely(src == 32))
> -            src = 0;
> -    }
> -    if (unlikely(T1 != 0)) {
> -        for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
> -            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
> -    }
> -}
> -
> -#if defined(TARGET_PPC64)
> -void glue(do_stsw_le_64, MEMSUFFIX) (int src)
> -{
> -    int sh;
> -
> -    for (; T1 > 3; T1 -= 4, T0 += 4) {
> -        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
> +        glue(stlr, MEMSUFFIX)(T0, env->gpr[src++]);
>          if (unlikely(src == 32))
>              src = 0;
>      }
>      if (unlikely(T1 != 0)) {
>          for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
> -            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
> +            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
>      }
>  }
> -#endif
>  
>  /* Instruction cache invalidation helper */
>  void glue(do_icbi, MEMSUFFIX) (void)
> @@ -252,27 +124,11 @@ void glue(do_icbi, MEMSUFFIX) (void)
>       * (not a fetch) by the MMU. To be sure it will be so,
>       * do the load "by hand".
>       */
> -    tmp = glue(ldl, MEMSUFFIX)((uint32_t)T0);
> -    T0 &= ~(env->icache_line_size - 1);
> -    tb_invalidate_page_range((uint32_t)T0,
> -                             (uint32_t)(T0 + env->icache_line_size));
> -}
> -
> -#if defined(TARGET_PPC64)
> -void glue(do_icbi_64, MEMSUFFIX) (void)
> -{
> -    uint64_t tmp;
> -    /* Invalidate one cache line :
> -     * PowerPC specification says this is to be treated like a load
> -     * (not a fetch) by the MMU. To be sure it will be so,
> -     * do the load "by hand".
> -     */
> -    tmp = glue(ldq, MEMSUFFIX)((uint64_t)T0);
> +    tmp = glue(ldul, MEMSUFFIX)(T0);
>      T0 &= ~(env->icache_line_size - 1);
> -    tb_invalidate_page_range((uint64_t)T0,
> -                             (uint64_t)(T0 + env->icache_line_size));
> +    /* We assume it would not wrap around 2^32 on 32 bits targets */
> +    tb_invalidate_page_range(T0, T0 + env->icache_line_size);
>  }
> -#endif
>  
>  void glue(do_dcbz, MEMSUFFIX) (void)
>  {
> @@ -281,90 +137,43 @@ void glue(do_dcbz, MEMSUFFIX) (void)
>      /* XXX: should be 970 specific (?) */
>      if (((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1)
>          dcache_line_size = 32;
> -    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x00), 0);
> -    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x04), 0);
> -    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x08), 0);
> -    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x0C), 0);
> -    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x10), 0);
> -    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x14), 0);
> -    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x18), 0);
> -    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x1C), 0);
> -    if (dcache_line_size >= 64) {
> -        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x20UL), 0);
> -        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x24UL), 0);
> -        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x28UL), 0);
> -        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x2CUL), 0);
> -        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x30UL), 0);
> -        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x34UL), 0);
> -        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x38UL), 0);
> -        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x3CUL), 0);
> -        if (dcache_line_size >= 128) {
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x40UL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x44UL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x48UL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x4CUL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x50UL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x54UL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x58UL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x5CUL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x60UL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x64UL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x68UL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x6CUL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x70UL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x74UL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x78UL), 0);
> -            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x7CUL), 0);
> -        }
> -    }
> -}
> -
> -#if defined(TARGET_PPC64)
> -void glue(do_dcbz_64, MEMSUFFIX) (void)
> -{
> -    int dcache_line_size = env->dcache_line_size;
> -
> -    /* XXX: should be 970 specific (?) */
> -    if (((env->spr[SPR_970_HID5] >> 6) & 0x3) == 0x2)
> -        dcache_line_size = 32;
> -    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x00), 0);
> -    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x04), 0);
> -    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x08), 0);
> -    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x0C), 0);
> -    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x10), 0);
> -    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x14), 0);
> -    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x18), 0);
> -    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x1C), 0);
> +    glue(stl, MEMSUFFIX)(T0 + 0x00, 0);
> +    glue(stl, MEMSUFFIX)(T0 + 0x04, 0);
> +    glue(stl, MEMSUFFIX)(T0 + 0x08, 0);
> +    glue(stl, MEMSUFFIX)(T0 + 0x0C, 0);
> +    glue(stl, MEMSUFFIX)(T0 + 0x10, 0);
> +    glue(stl, MEMSUFFIX)(T0 + 0x14, 0);
> +    glue(stl, MEMSUFFIX)(T0 + 0x18, 0);
> +    glue(stl, MEMSUFFIX)(T0 + 0x1C, 0);
>      if (dcache_line_size >= 64) {
> -        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x20UL), 0);
> -        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x24UL), 0);
> -        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x28UL), 0);
> -        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x2CUL), 0);
> -        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x30UL), 0);
> -        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x34UL), 0);
> -        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x38UL), 0);
> -        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x3CUL), 0);
> +        glue(stl, MEMSUFFIX)(T0 + 0x20UL, 0);
> +        glue(stl, MEMSUFFIX)(T0 + 0x24UL, 0);
> +        glue(stl, MEMSUFFIX)(T0 + 0x28UL, 0);
> +        glue(stl, MEMSUFFIX)(T0 + 0x2CUL, 0);
> +        glue(stl, MEMSUFFIX)(T0 + 0x30UL, 0);
> +        glue(stl, MEMSUFFIX)(T0 + 0x34UL, 0);
> +        glue(stl, MEMSUFFIX)(T0 + 0x38UL, 0);
> +        glue(stl, MEMSUFFIX)(T0 + 0x3CUL, 0);
>          if (dcache_line_size >= 128) {
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x40UL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x44UL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x48UL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x4CUL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x50UL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x54UL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x58UL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x5CUL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x60UL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x64UL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x68UL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x6CUL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x70UL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x74UL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x78UL), 0);
> -            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x7CUL), 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x40UL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x44UL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x48UL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x4CUL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x50UL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x54UL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x58UL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x5CUL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x60UL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x64UL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x68UL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x6CUL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x70UL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x74UL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x78UL, 0);
> +            glue(stl, MEMSUFFIX)(T0 + 0x7CUL, 0);
>          }
>      }
>  }
> -#endif
>  
>  /* PowerPC 601 specific instructions (POWER bridge) */
>  // XXX: to be tested
> @@ -400,26 +209,6 @@ void glue(do_POWER2_lfq, MEMSUFFIX) (voi
>      FT1 = glue(ldfq, MEMSUFFIX)((uint32_t)(T0 + 4));
>  }
>  
> -static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
> -{
> -    union {
> -        double d;
> -        uint64_t u;
> -    } u;
> -
> -    u.d = glue(ldfq, MEMSUFFIX)(EA);
> -    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
> -        ((u.u & 0x00FF000000000000ULL) >> 40) |
> -        ((u.u & 0x0000FF0000000000ULL) >> 24) |
> -        ((u.u & 0x000000FF00000000ULL) >> 8) |
> -        ((u.u & 0x00000000FF000000ULL) << 8) |
> -        ((u.u & 0x0000000000FF0000ULL) << 24) |
> -        ((u.u & 0x000000000000FF00ULL) << 40) |
> -        ((u.u & 0x00000000000000FFULL) << 56);
> -
> -    return u.d;
> -}
> -
>  void glue(do_POWER2_lfq_le, MEMSUFFIX) (void)
>  {
>      FT0 = glue(ldfqr, MEMSUFFIX)((uint32_t)(T0 + 4));
> @@ -432,25 +221,6 @@ void glue(do_POWER2_stfq, MEMSUFFIX) (vo
>      glue(stfq, MEMSUFFIX)((uint32_t)(T0 + 4), FT1);
>  }
>  
> -static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
> -{
> -    union {
> -        double d;
> -        uint64_t u;
> -    } u;
> -
> -    u.d = d;
> -    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
> -        ((u.u & 0x00FF000000000000ULL) >> 40) |
> -        ((u.u & 0x0000FF0000000000ULL) >> 24) |
> -        ((u.u & 0x000000FF00000000ULL) >> 8) |
> -        ((u.u & 0x00000000FF000000ULL) << 8) |
> -        ((u.u & 0x0000000000FF0000ULL) << 24) |
> -        ((u.u & 0x000000000000FF00ULL) << 40) |
> -        ((u.u & 0x00000000000000FFULL) << 56);
> -    glue(stfq, MEMSUFFIX)(EA, u.d);
> -}
> -
>  void glue(do_POWER2_stfq_le, MEMSUFFIX) (void)
>  {
>      glue(stfqr, MEMSUFFIX)((uint32_t)(T0 + 4), FT0);
> Index: target-ppc/op_mem.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-ppc/op_mem.h,v
> retrieving revision 1.22
> diff -u -d -d -p -r1.22 op_mem.h
> --- target-ppc/op_mem.h	7 Oct 2007 18:19:25 -0000	1.22
> +++ target-ppc/op_mem.h	16 Oct 2007 11:39:08 -0000
> @@ -18,85 +18,6 @@
>   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
>   */
>  
> -static always_inline uint16_t glue(ld16r, MEMSUFFIX) (target_ulong EA)
> -{
> -    uint16_t tmp = glue(lduw, MEMSUFFIX)(EA);
> -    return ((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
> -}
> -
> -static always_inline int32_t glue(ld16rs, MEMSUFFIX) (target_ulong EA)
> -{
> -    int16_t tmp = glue(lduw, MEMSUFFIX)(EA);
> -    return (int16_t)((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
> -}
> -
> -static always_inline uint32_t glue(ld32r, MEMSUFFIX) (target_ulong EA)
> -{
> -    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
> -    return ((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
> -        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
> -}
> -
> -#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
> -static always_inline uint64_t glue(ld64r, MEMSUFFIX) (target_ulong EA)
> -{
> -    uint64_t tmp = glue(ldq, MEMSUFFIX)(EA);
> -    return ((tmp & 0xFF00000000000000ULL) >> 56) |
> -        ((tmp & 0x00FF000000000000ULL) >> 40) |
> -        ((tmp & 0x0000FF0000000000ULL) >> 24) |
> -        ((tmp & 0x000000FF00000000ULL) >> 8) |
> -        ((tmp & 0x00000000FF000000ULL) << 8) |
> -        ((tmp & 0x0000000000FF0000ULL) << 24) |
> -        ((tmp & 0x000000000000FF00ULL) << 40) |
> -        ((tmp & 0x00000000000000FFULL) << 54);
> -}
> -#endif
> -
> -#if defined(TARGET_PPC64)
> -static always_inline int64_t glue(ldsl, MEMSUFFIX) (target_ulong EA)
> -{
> -    return (int32_t)glue(ldl, MEMSUFFIX)(EA);
> -}
> -
> -static always_inline int64_t glue(ld32rs, MEMSUFFIX) (target_ulong EA)
> -{
> -    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
> -    return (int32_t)((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
> -        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
> -}
> -#endif
> -
> -static always_inline void glue(st16r, MEMSUFFIX) (target_ulong EA,
> -                                                  uint16_t data)
> -{
> -    uint16_t tmp = ((data & 0xFF00) >> 8) | ((data & 0x00FF) << 8);
> -    glue(stw, MEMSUFFIX)(EA, tmp);
> -}
> -
> -static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
> -                                                  uint32_t data)
> -{
> -    uint32_t tmp = ((data & 0xFF000000) >> 24) | ((data & 0x00FF0000) >> 8) |
> -        ((data & 0x0000FF00) << 8) | ((data & 0x000000FF) << 24);
> -    glue(stl, MEMSUFFIX)(EA, tmp);
> -}
> -
> -#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
> -static always_inline void glue(st64r, MEMSUFFIX) (target_ulong EA,
> -                                                  uint64_t data)
> -{
> -    uint64_t tmp = ((data & 0xFF00000000000000ULL) >> 56) |
> -        ((data & 0x00FF000000000000ULL) >> 40) |
> -        ((data & 0x0000FF0000000000ULL) >> 24) |
> -        ((data & 0x000000FF00000000ULL) >> 8) |
> -        ((data & 0x00000000FF000000ULL) << 8) |
> -        ((data & 0x0000000000FF0000ULL) << 24) |
> -        ((data & 0x000000000000FF00ULL) << 40) |
> -        ((data & 0x00000000000000FFULL) << 56);
> -    glue(stq, MEMSUFFIX)(EA, tmp);
> -}
> -#endif
> -
>  /***                             Integer load                              ***/
>  #define PPC_LD_OP(name, op)                                                   \
>  void OPPROTO glue(glue(op_l, name), MEMSUFFIX) (void)                         \
> @@ -130,35 +51,37 @@ void OPPROTO glue(glue(glue(op_st, name)
>  }
>  #endif
>  
> +/* Native-endian fixed-point memory loads                                    */
>  PPC_LD_OP(bz, ldub);
>  PPC_LD_OP(ha, ldsw);
>  PPC_LD_OP(hz, lduw);
> -PPC_LD_OP(wz, ldl);
> +PPC_LD_OP(wz, ldul);
>  #if defined(TARGET_PPC64)
> -PPC_LD_OP(d, ldq);
> +PPC_LD_OP(d, lduq);
>  PPC_LD_OP(wa, ldsl);
> -PPC_LD_OP_64(d, ldq);
> +PPC_LD_OP_64(d, lduq);
>  PPC_LD_OP_64(wa, ldsl);
>  PPC_LD_OP_64(bz, ldub);
>  PPC_LD_OP_64(ha, ldsw);
>  PPC_LD_OP_64(hz, lduw);
> -PPC_LD_OP_64(wz, ldl);
> +PPC_LD_OP_64(wz, ldul);
>  #endif
>  
> -PPC_LD_OP(ha_le, ld16rs);
> -PPC_LD_OP(hz_le, ld16r);
> -PPC_LD_OP(wz_le, ld32r);
> +/* Reverse-endian fixed-point memory loads                                   */
> +PPC_LD_OP(ha_le, ldswr);
> +PPC_LD_OP(hz_le, lduwr);
> +PPC_LD_OP(wz_le, ldulr);
>  #if defined(TARGET_PPC64)
> -PPC_LD_OP(d_le, ld64r);
> -PPC_LD_OP(wa_le, ld32rs);
> -PPC_LD_OP_64(d_le, ld64r);
> -PPC_LD_OP_64(wa_le, ld32rs);
> -PPC_LD_OP_64(ha_le, ld16rs);
> -PPC_LD_OP_64(hz_le, ld16r);
> -PPC_LD_OP_64(wz_le, ld32r);
> +PPC_LD_OP(d_le, lduqr);
> +PPC_LD_OP(wa_le, ldslr);
> +PPC_LD_OP_64(d_le, lduqr);
> +PPC_LD_OP_64(wa_le, ldslr);
> +PPC_LD_OP_64(ha_le, ldswr);
> +PPC_LD_OP_64(hz_le, lduwr);
> +PPC_LD_OP_64(wz_le, ldulr);
>  #endif
>  
> -/***                              Integer store                            ***/
> +/* Native-endian fixed-point memory stores                                   */
>  PPC_ST_OP(b, stb);
>  PPC_ST_OP(h, stw);
>  PPC_ST_OP(w, stl);
> @@ -170,120 +93,110 @@ PPC_ST_OP_64(h, stw);
>  PPC_ST_OP_64(w, stl);
>  #endif
>  
> -PPC_ST_OP(h_le, st16r);
> -PPC_ST_OP(w_le, st32r);
> +/* Reverse-endian fixed-point memory stores                                  */
> +PPC_ST_OP(h_le, stwr);
> +PPC_ST_OP(w_le, stlr);
>  #if defined(TARGET_PPC64)
> -PPC_ST_OP(d_le, st64r);
> -PPC_ST_OP_64(d_le, st64r);
> -PPC_ST_OP_64(h_le, st16r);
> -PPC_ST_OP_64(w_le, st32r);
> +PPC_ST_OP(d_le, stqr);
> +PPC_ST_OP_64(d_le, stqr);
> +PPC_ST_OP_64(h_le, stwr);
> +PPC_ST_OP_64(w_le, stlr);
>  #endif
>  
> -/***                Integer load and store with byte reverse               ***/
> -PPC_LD_OP(hbr, ld16r);
> -PPC_LD_OP(wbr, ld32r);
> -PPC_ST_OP(hbr, st16r);
> -PPC_ST_OP(wbr, st32r);
> +/* Native-endian fixed-point loads and stores with byte-reverse              */
> +PPC_LD_OP(hbr, lduwr);
> +PPC_LD_OP(wbr, ldulr);
> +PPC_ST_OP(hbr, stwr);
> +PPC_ST_OP(wbr, stlr);
>  #if defined(TARGET_PPC64)
> -PPC_LD_OP_64(hbr, ld16r);
> -PPC_LD_OP_64(wbr, ld32r);
> -PPC_ST_OP_64(hbr, st16r);
> -PPC_ST_OP_64(wbr, st32r);
> +PPC_LD_OP_64(hbr, lduwr);
> +PPC_LD_OP_64(wbr, ldulr);
> +PPC_ST_OP_64(hbr, stwr);
> +PPC_ST_OP_64(wbr, stlr);
>  #endif
>  
> +/* Reverse-endian fixed-point loads and stores with byte-reverse             */
>  PPC_LD_OP(hbr_le, lduw);
> -PPC_LD_OP(wbr_le, ldl);
> +PPC_LD_OP(wbr_le, ldul);
>  PPC_ST_OP(hbr_le, stw);
>  PPC_ST_OP(wbr_le, stl);
>  #if defined(TARGET_PPC64)
>  PPC_LD_OP_64(hbr_le, lduw);
> -PPC_LD_OP_64(wbr_le, ldl);
> +PPC_LD_OP_64(wbr_le, ldul);
>  PPC_ST_OP_64(hbr_le, stw);
>  PPC_ST_OP_64(wbr_le, stl);
>  #endif
>  
> -/***                    Integer load and store multiple                    ***/
> +/* Native-endian fixed-point loads and stores multiple                       */
>  void OPPROTO glue(op_lmw, MEMSUFFIX) (void)
>  {
> +    T0 = (uint32_t)T0;
>      glue(do_lmw, MEMSUFFIX)(PARAM1);
>      RETURN();
>  }
>  
> -#if defined(TARGET_PPC64)
> -void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
> -{
> -    glue(do_lmw_64, MEMSUFFIX)(PARAM1);
> -    RETURN();
> -}
> -#endif
> -
> -void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
> +void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
>  {
> -    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
> +    T0 = (uint32_t)T0;
> +    glue(do_stmw, MEMSUFFIX)(PARAM1);
>      RETURN();
>  }
>  
>  #if defined(TARGET_PPC64)
> -void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
> +void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
>  {
> -    glue(do_lmw_le_64, MEMSUFFIX)(PARAM1);
> +    glue(do_lmw, MEMSUFFIX)(PARAM1);
>      RETURN();
>  }
> -#endif
>  
> -void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
> +void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
>  {
>      glue(do_stmw, MEMSUFFIX)(PARAM1);
>      RETURN();
>  }
> +#endif
>  
> -#if defined(TARGET_PPC64)
> -void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
> +/* Reverse-endian fixed-point loads and stores multiple                      */
> +void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
>  {
> -    glue(do_stmw_64, MEMSUFFIX)(PARAM1);
> +    T0 = (uint32_t)T0;
> +    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
>      RETURN();
>  }
> -#endif
>  
>  void OPPROTO glue(op_stmw_le, MEMSUFFIX) (void)
>  {
> +    T0 = (uint32_t)T0;
>      glue(do_stmw_le, MEMSUFFIX)(PARAM1);
>      RETURN();
>  }
>  
>  #if defined(TARGET_PPC64)
> -void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
> -{
> -    glue(do_stmw_le_64, MEMSUFFIX)(PARAM1);
> -    RETURN();
> -}
> -#endif
> -
> -/***                    Integer load and store strings                     ***/
> -void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
> +void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
>  {
> -    glue(do_lsw, MEMSUFFIX)(PARAM1);
> +    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
>      RETURN();
>  }
>  
> -#if defined(TARGET_PPC64)
> -void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
> +void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
>  {
> -    glue(do_lsw_64, MEMSUFFIX)(PARAM1);
> +    glue(do_stmw_le, MEMSUFFIX)(PARAM1);
>      RETURN();
>  }
>  #endif
>  
> -void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
> +/* Native-endian loads and stores string                                     */
> +void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
>  {
> -    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
> +    glue(do_lsw, MEMSUFFIX)(PARAM1);
>      RETURN();
>  }
>  
>  #if defined(TARGET_PPC64)
> -void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
> +void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
>  {
> -    glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
> +    T0 = (uint32_t)T0;
> +    glue(do_lsw, MEMSUFFIX)(PARAM1);
>      RETURN();
>  }
>  #endif
> @@ -303,6 +216,7 @@ void OPPROTO glue(op_lswx, MEMSUFFIX) (v
>                                     POWERPC_EXCP_INVAL |
>                                     POWERPC_EXCP_INVAL_LSWX);
>          } else {
> +            T0 = (uint32_t)T0;
>              glue(do_lsw, MEMSUFFIX)(PARAM1);
>          }
>      }
> @@ -320,13 +234,44 @@ void OPPROTO glue(op_lswx_64, MEMSUFFIX)
>                                     POWERPC_EXCP_INVAL |
>                                     POWERPC_EXCP_INVAL_LSWX);
>          } else {
> -            glue(do_lsw_64, MEMSUFFIX)(PARAM1);
> +            glue(do_lsw, MEMSUFFIX)(PARAM1);
>          }
>      }
>      RETURN();
>  }
>  #endif
>  
> +void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
> +{
> +    T0 = (uint32_t)T0;
> +    glue(do_stsw, MEMSUFFIX)(PARAM1);
> +    RETURN();
> +}
> +
> +#if defined(TARGET_PPC64)
> +void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
> +{
> +    glue(do_stsw, MEMSUFFIX)(PARAM1);
> +    RETURN();
> +}
> +#endif
> +
> +/* Reverse-endian loads and stores string                                    */
> +void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
> +{
> +    T0 = (uint32_t)T0;
> +    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
> +    RETURN();
> +}
> +
> +#if defined(TARGET_PPC64)
> +void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
> +{
> +    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
> +    RETURN();
> +}
> +#endif
> +
>  void OPPROTO glue(op_lswx_le, MEMSUFFIX) (void)
>  {
>      /* Note: T1 comes from xer_bc then no cast is needed */
> @@ -337,6 +282,7 @@ void OPPROTO glue(op_lswx_le, MEMSUFFIX)
>                                     POWERPC_EXCP_INVAL |
>                                     POWERPC_EXCP_INVAL_LSWX);
>          } else {
> +            T0 = (uint32_t)T0;
>              glue(do_lsw_le, MEMSUFFIX)(PARAM1);
>          }
>      }
> @@ -354,29 +300,16 @@ void OPPROTO glue(op_lswx_le_64, MEMSUFF
>                                     POWERPC_EXCP_INVAL |
>                                     POWERPC_EXCP_INVAL_LSWX);
>          } else {
> -            glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
> +            glue(do_lsw_le, MEMSUFFIX)(PARAM1);
>          }
>      }
>      RETURN();
>  }
>  #endif
>  
> -void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
> -{
> -    glue(do_stsw, MEMSUFFIX)(PARAM1);
> -    RETURN();
> -}
> -
> -#if defined(TARGET_PPC64)
> -void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
> -{
> -    glue(do_stsw_64, MEMSUFFIX)(PARAM1);
> -    RETURN();
> -}
> -#endif
> -
>  void OPPROTO glue(op_stsw_le, MEMSUFFIX) (void)
>  {
> +    T0 = (uint32_t)T0;
>      glue(do_stsw_le, MEMSUFFIX)(PARAM1);
>      RETURN();
>  }
> @@ -384,7 +317,7 @@ void OPPROTO glue(op_stsw_le, MEMSUFFIX)
>  #if defined(TARGET_PPC64)
>  void OPPROTO glue(op_stsw_le_64, MEMSUFFIX) (void)
>  {
> -    glue(do_stsw_le_64, MEMSUFFIX)(PARAM1);
> +    glue(do_stsw_le, MEMSUFFIX)(PARAM1);
>      RETURN();
>  }
>  #endif
> @@ -432,38 +365,9 @@ PPC_STF_OP_64(fs, stfs);
>  PPC_STF_OP_64(fiwx, stfiwx);
>  #endif
>  
> -static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
> -{
> -    union {
> -        double d;
> -        uint64_t u;
> -    } u;
> -
> -    u.d = d;
> -    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
> -        ((u.u & 0x00FF000000000000ULL) >> 40) |
> -        ((u.u & 0x0000FF0000000000ULL) >> 24) |
> -        ((u.u & 0x000000FF00000000ULL) >> 8) |
> -        ((u.u & 0x00000000FF000000ULL) << 8) |
> -        ((u.u & 0x0000000000FF0000ULL) << 24) |
> -        ((u.u & 0x000000000000FF00ULL) << 40) |
> -        ((u.u & 0x00000000000000FFULL) << 56);
> -    glue(stfq, MEMSUFFIX)(EA, u.d);
> -}
> -
>  static always_inline void glue(stfsr, MEMSUFFIX) (target_ulong EA, double d)
>  {
> -    union {
> -        float f;
> -        uint32_t u;
> -    } u;
> -
> -    u.f = float64_to_float32(d, &env->fp_status);
> -    u.u = ((u.u & 0xFF000000UL) >> 24) |
> -        ((u.u & 0x00FF0000ULL) >> 8) |
> -        ((u.u & 0x0000FF00UL) << 8) |
> -        ((u.u & 0x000000FFULL) << 24);
> -    glue(stfl, MEMSUFFIX)(EA, u.f);
> +    glue(stflr, MEMSUFFIX)(EA, float64_to_float32(d, &env->fp_status));
>  }
>  
>  static always_inline void glue(stfiwxr, MEMSUFFIX) (target_ulong EA, double d)
> @@ -475,11 +379,7 @@ static always_inline void glue(stfiwxr, 
>  
>      /* Store the low order 32 bits without any conversion */
>      u.d = d;
> -    u.u = ((u.u & 0xFF000000UL) >> 24) |
> -        ((u.u & 0x00FF0000ULL) >> 8) |
> -        ((u.u & 0x0000FF00UL) << 8) |
> -        ((u.u & 0x000000FFULL) << 24);
> -    glue(stl, MEMSUFFIX)(EA, u.u);
> +    glue(stlr, MEMSUFFIX)(EA, u.u);
>  }
>  
>  PPC_STF_OP(fd_le, stfqr);
> @@ -520,40 +420,9 @@ PPC_LDF_OP_64(fd, ldfq);
>  PPC_LDF_OP_64(fs, ldfs);
>  #endif
>  
> -static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
> -{
> -    union {
> -        double d;
> -        uint64_t u;
> -    } u;
> -
> -    u.d = glue(ldfq, MEMSUFFIX)(EA);
> -    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
> -        ((u.u & 0x00FF000000000000ULL) >> 40) |
> -        ((u.u & 0x0000FF0000000000ULL) >> 24) |
> -        ((u.u & 0x000000FF00000000ULL) >> 8) |
> -        ((u.u & 0x00000000FF000000ULL) << 8) |
> -        ((u.u & 0x0000000000FF0000ULL) << 24) |
> -        ((u.u & 0x000000000000FF00ULL) << 40) |
> -        ((u.u & 0x00000000000000FFULL) << 56);
> -
> -    return u.d;
> -}
> -
>  static always_inline double glue(ldfsr, MEMSUFFIX) (target_ulong EA)
>  {
> -    union {
> -        float f;
> -        uint32_t u;
> -    } u;
> -
> -    u.f = glue(ldfl, MEMSUFFIX)(EA);
> -    u.u = ((u.u & 0xFF000000UL) >> 24) |
> -        ((u.u & 0x00FF0000ULL) >> 8) |
> -        ((u.u & 0x0000FF00UL) << 8) |
> -        ((u.u & 0x000000FFULL) << 24);
> -
> -    return float32_to_float64(u.f, &env->fp_status);
> +    return float32_to_float64(glue(ldflr, MEMSUFFIX)(EA), &env->fp_status);
>  }
>  
>  PPC_LDF_OP(fd_le, ldfqr);
> @@ -569,7 +438,7 @@ void OPPROTO glue(op_lwarx, MEMSUFFIX) (
>      if (unlikely(T0 & 0x03)) {
>          do_raise_exception(POWERPC_EXCP_ALIGN);
>      } else {
> -        T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
> +        T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
>          env->reserve = (uint32_t)T0;
>      }
>      RETURN();
> @@ -581,7 +450,7 @@ void OPPROTO glue(op_lwarx_64, MEMSUFFIX
>      if (unlikely(T0 & 0x03)) {
>          do_raise_exception(POWERPC_EXCP_ALIGN);
>      } else {
> -        T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
> +        T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
>          env->reserve = (uint64_t)T0;
>      }
>      RETURN();
> @@ -592,7 +461,7 @@ void OPPROTO glue(op_ldarx, MEMSUFFIX) (
>      if (unlikely(T0 & 0x03)) {
>          do_raise_exception(POWERPC_EXCP_ALIGN);
>      } else {
> -        T1 = glue(ldq, MEMSUFFIX)((uint32_t)T0);
> +        T1 = glue(lduq, MEMSUFFIX)((uint32_t)T0);
>          env->reserve = (uint32_t)T0;
>      }
>      RETURN();
> @@ -603,7 +472,7 @@ void OPPROTO glue(op_ldarx_64, MEMSUFFIX
>      if (unlikely(T0 & 0x03)) {
>          do_raise_exception(POWERPC_EXCP_ALIGN);
>      } else {
> -        T1 = glue(ldq, MEMSUFFIX)((uint64_t)T0);
> +        T1 = glue(lduq, MEMSUFFIX)((uint64_t)T0);
>          env->reserve = (uint64_t)T0;
>      }
>      RETURN();
> @@ -615,7 +484,7 @@ void OPPROTO glue(op_lwarx_le, MEMSUFFIX
>      if (unlikely(T0 & 0x03)) {
>          do_raise_exception(POWERPC_EXCP_ALIGN);
>      } else {
> -        T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
> +        T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
>          env->reserve = (uint32_t)T0;
>      }
>      RETURN();
> @@ -627,7 +496,7 @@ void OPPROTO glue(op_lwarx_le_64, MEMSUF
>      if (unlikely(T0 & 0x03)) {
>          do_raise_exception(POWERPC_EXCP_ALIGN);
>      } else {
> -        T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
> +        T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
>          env->reserve = (uint64_t)T0;
>      }
>      RETURN();
> @@ -638,7 +507,7 @@ void OPPROTO glue(op_ldarx_le, MEMSUFFIX
>      if (unlikely(T0 & 0x03)) {
>          do_raise_exception(POWERPC_EXCP_ALIGN);
>      } else {
> -        T1 = glue(ld64r, MEMSUFFIX)((uint32_t)T0);
> +        T1 = glue(lduqr, MEMSUFFIX)((uint32_t)T0);
>          env->reserve = (uint32_t)T0;
>      }
>      RETURN();
> @@ -649,7 +518,7 @@ void OPPROTO glue(op_ldarx_le_64, MEMSUF
>      if (unlikely(T0 & 0x03)) {
>          do_raise_exception(POWERPC_EXCP_ALIGN);
>      } else {
> -        T1 = glue(ld64r, MEMSUFFIX)((uint64_t)T0);
> +        T1 = glue(lduqr, MEMSUFFIX)((uint64_t)T0);
>          env->reserve = (uint64_t)T0;
>      }
>      RETURN();
> @@ -731,7 +600,7 @@ void OPPROTO glue(op_stwcx_le, MEMSUFFIX
>          if (unlikely(env->reserve != (uint32_t)T0)) {
>              env->crf[0] = xer_so;
>          } else {
> -            glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
> +            glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
>              env->crf[0] = xer_so | 0x02;
>          }
>      }
> @@ -748,7 +617,7 @@ void OPPROTO glue(op_stwcx_le_64, MEMSUF
>          if (unlikely(env->reserve != (uint64_t)T0)) {
>              env->crf[0] = xer_so;
>          } else {
> -            glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
> +            glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
>              env->crf[0] = xer_so | 0x02;
>          }
>      }
> @@ -764,7 +633,7 @@ void OPPROTO glue(op_stdcx_le, MEMSUFFIX
>          if (unlikely(env->reserve != (uint32_t)T0)) {
>              env->crf[0] = xer_so;
>          } else {
> -            glue(st64r, MEMSUFFIX)((uint32_t)T0, T1);
> +            glue(stqr, MEMSUFFIX)((uint32_t)T0, T1);
>              env->crf[0] = xer_so | 0x02;
>          }
>      }
> @@ -780,7 +649,7 @@ void OPPROTO glue(op_stdcx_le_64, MEMSUF
>          if (unlikely(env->reserve != (uint64_t)T0)) {
>              env->crf[0] = xer_so;
>          } else {
> -            glue(st64r, MEMSUFFIX)((uint64_t)T0, T1);
> +            glue(stqr, MEMSUFFIX)((uint64_t)T0, T1);
>              env->crf[0] = xer_so | 0x02;
>          }
>      }
> @@ -862,6 +731,7 @@ void OPPROTO glue(op_dcbz_l128, MEMSUFFI
>  
>  void OPPROTO glue(op_dcbz, MEMSUFFIX) (void)
>  {
> +    T0 = (uint32_t)T0;
>      glue(do_dcbz, MEMSUFFIX)();
>      RETURN();
>  }
> @@ -940,7 +810,7 @@ void OPPROTO glue(op_dcbz_l128_64, MEMSU
>  
>  void OPPROTO glue(op_dcbz_64, MEMSUFFIX) (void)
>  {
> -    glue(do_dcbz_64, MEMSUFFIX)();
> +    glue(do_dcbz, MEMSUFFIX)();
>      RETURN();
>  }
>  #endif
> @@ -948,6 +818,7 @@ void OPPROTO glue(op_dcbz_64, MEMSUFFIX)
>  /* Instruction cache block invalidate */
>  void OPPROTO glue(op_icbi, MEMSUFFIX) (void)
>  {
> +    T0 = (uint32_t)T0;
>      glue(do_icbi, MEMSUFFIX)();
>      RETURN();
>  }
> @@ -955,7 +826,7 @@ void OPPROTO glue(op_icbi, MEMSUFFIX) (v
>  #if defined(TARGET_PPC64)
>  void OPPROTO glue(op_icbi_64, MEMSUFFIX) (void)
>  {
> -    glue(do_icbi_64, MEMSUFFIX)();
> +    glue(do_icbi, MEMSUFFIX)();
>      RETURN();
>  }
>  #endif
> @@ -963,14 +834,14 @@ void OPPROTO glue(op_icbi_64, MEMSUFFIX)
>  /* External access */
>  void OPPROTO glue(op_eciwx, MEMSUFFIX) (void)
>  {
> -    T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
> +    T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
>      RETURN();
>  }
>  
>  #if defined(TARGET_PPC64)
>  void OPPROTO glue(op_eciwx_64, MEMSUFFIX) (void)
>  {
> -    T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
> +    T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
>      RETURN();
>  }
>  #endif
> @@ -991,28 +862,28 @@ void OPPROTO glue(op_ecowx_64, MEMSUFFIX
>  
>  void OPPROTO glue(op_eciwx_le, MEMSUFFIX) (void)
>  {
> -    T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
> +    T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
>      RETURN();
>  }
>  
>  #if defined(TARGET_PPC64)
>  void OPPROTO glue(op_eciwx_le_64, MEMSUFFIX) (void)
>  {
> -    T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
> +    T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
>      RETURN();
>  }
>  #endif
>  
>  void OPPROTO glue(op_ecowx_le, MEMSUFFIX) (void)
>  {
> -    glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
> +    glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
>      RETURN();
>  }
>  
>  #if defined(TARGET_PPC64)
>  void OPPROTO glue(op_ecowx_le_64, MEMSUFFIX) (void)
>  {
> -    glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
> +    glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
>      RETURN();
>  }
>  #endif
> @@ -1064,14 +935,14 @@ void OPPROTO glue(op_POWER2_stfq_le, MEM
>  #endif
>  void OPPROTO glue(op_vr_lvx, MEMSUFFIX) (void)
>  {
> -    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint32_t)T0);
> -    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint32_t)T0 + 8);
> +    AVR0.u64[VR_DWORD0] = glue(lduq, MEMSUFFIX)((uint32_t)T0);
> +    AVR0.u64[VR_DWORD1] = glue(lduq, MEMSUFFIX)((uint32_t)T0 + 8);
>  }
>  
>  void OPPROTO glue(op_vr_lvx_le, MEMSUFFIX) (void)
>  {
> -    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint32_t)T0);
> -    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint32_t)T0 + 8);
> +    AVR0.u64[VR_DWORD1] = glue(lduqr, MEMSUFFIX)((uint32_t)T0);
> +    AVR0.u64[VR_DWORD0] = glue(lduqr, MEMSUFFIX)((uint32_t)T0 + 8);
>  }
>  
>  void OPPROTO glue(op_vr_stvx, MEMSUFFIX) (void)
> @@ -1082,21 +953,21 @@ void OPPROTO glue(op_vr_stvx, MEMSUFFIX)
>  
>  void OPPROTO glue(op_vr_stvx_le, MEMSUFFIX) (void)
>  {
> -    glue(stq, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
> -    glue(stq, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
> +    glue(stqr, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
> +    glue(stqr, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
>  }
>  
>  #if defined(TARGET_PPC64)
>  void OPPROTO glue(op_vr_lvx_64, MEMSUFFIX) (void)
>  {
> -    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint64_t)T0);
> -    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint64_t)T0 + 8);
> +    AVR0.u64[VR_DWORD0] = glue(lduq, MEMSUFFIX)((uint64_t)T0);
> +    AVR0.u64[VR_DWORD1] = glue(lduq, MEMSUFFIX)((uint64_t)T0 + 8);
>  }
>  
>  void OPPROTO glue(op_vr_lvx_le_64, MEMSUFFIX) (void)
>  {
> -    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint64_t)T0);
> -    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint64_t)T0 + 8);
> +    AVR0.u64[VR_DWORD1] = glue(lduqr, MEMSUFFIX)((uint64_t)T0);
> +    AVR0.u64[VR_DWORD0] = glue(lduqr, MEMSUFFIX)((uint64_t)T0 + 8);
>  }
>  
>  void OPPROTO glue(op_vr_stvx_64, MEMSUFFIX) (void)
> @@ -1107,8 +978,8 @@ void OPPROTO glue(op_vr_stvx_64, MEMSUFF
>  
>  void OPPROTO glue(op_vr_stvx_le_64, MEMSUFFIX) (void)
>  {
> -    glue(stq, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
> -    glue(stq, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
> +    glue(stqr, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
> +    glue(stqr, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
>  }
>  #endif
>  #undef VR_DWORD0
> @@ -1161,16 +1032,16 @@ _PPC_SPE_ST_OP(name, op)
>  #endif
>  
>  #if !defined(TARGET_PPC64)
> -PPC_SPE_LD_OP(dd, ldq);
> +PPC_SPE_LD_OP(dd, lduq);
>  PPC_SPE_ST_OP(dd, stq);
> -PPC_SPE_LD_OP(dd_le, ld64r);
> -PPC_SPE_ST_OP(dd_le, st64r);
> +PPC_SPE_LD_OP(dd_le, lduqr);
> +PPC_SPE_ST_OP(dd_le, stqr);
>  #endif
>  static always_inline uint64_t glue(spe_ldw, MEMSUFFIX) (target_ulong EA)
>  {
>      uint64_t ret;
> -    ret = (uint64_t)glue(ldl, MEMSUFFIX)(EA) << 32;
> -    ret |= (uint64_t)glue(ldl, MEMSUFFIX)(EA + 4);
> +    ret = (uint64_t)glue(ldul, MEMSUFFIX)(EA) << 32;
> +    ret |= (uint64_t)glue(ldul, MEMSUFFIX)(EA + 4);
>      return ret;
>  }
>  PPC_SPE_LD_OP(dw, spe_ldw);
> @@ -1184,16 +1055,16 @@ PPC_SPE_ST_OP(dw, spe_stdw);
>  static always_inline uint64_t glue(spe_ldw_le, MEMSUFFIX) (target_ulong EA)
>  {
>      uint64_t ret;
> -    ret = (uint64_t)glue(ld32r, MEMSUFFIX)(EA) << 32;
> -    ret |= (uint64_t)glue(ld32r, MEMSUFFIX)(EA + 4);
> +    ret = (uint64_t)glue(ldulr, MEMSUFFIX)(EA) << 32;
> +    ret |= (uint64_t)glue(ldulr, MEMSUFFIX)(EA + 4);
>      return ret;
>  }
>  PPC_SPE_LD_OP(dw_le, spe_ldw_le);
>  static always_inline void glue(spe_stdw_le, MEMSUFFIX) (target_ulong EA,
>                                                          uint64_t data)
>  {
> -    glue(st32r, MEMSUFFIX)(EA, data >> 32);
> -    glue(st32r, MEMSUFFIX)(EA + 4, data);
> +    glue(stlr, MEMSUFFIX)(EA, data >> 32);
> +    glue(stlr, MEMSUFFIX)(EA + 4, data);
>  }
>  PPC_SPE_ST_OP(dw_le, spe_stdw_le);
>  static always_inline uint64_t glue(spe_ldh, MEMSUFFIX) (target_ulong EA)
> @@ -1218,20 +1089,20 @@ PPC_SPE_ST_OP(dh, spe_stdh);
>  static always_inline uint64_t glue(spe_ldh_le, MEMSUFFIX) (target_ulong EA)
>  {
>      uint64_t ret;
> -    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
> -    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 32;
> -    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 4) << 16;
> -    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 6);
> +    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
> +    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 32;
> +    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 4) << 16;
> +    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 6);
>      return ret;
>  }
>  PPC_SPE_LD_OP(dh_le, spe_ldh_le);
>  static always_inline void glue(spe_stdh_le, MEMSUFFIX) (target_ulong EA,
>                                                          uint64_t data)
>  {
> -    glue(st16r, MEMSUFFIX)(EA, data >> 48);
> -    glue(st16r, MEMSUFFIX)(EA + 2, data >> 32);
> -    glue(st16r, MEMSUFFIX)(EA + 4, data >> 16);
> -    glue(st16r, MEMSUFFIX)(EA + 6, data);
> +    glue(stwr, MEMSUFFIX)(EA, data >> 48);
> +    glue(stwr, MEMSUFFIX)(EA + 2, data >> 32);
> +    glue(stwr, MEMSUFFIX)(EA + 4, data >> 16);
> +    glue(stwr, MEMSUFFIX)(EA + 6, data);
>  }
>  PPC_SPE_ST_OP(dh_le, spe_stdh_le);
>  static always_inline uint64_t glue(spe_lwhe, MEMSUFFIX) (target_ulong EA)
> @@ -1252,16 +1123,16 @@ PPC_SPE_ST_OP(whe, spe_stwhe);
>  static always_inline uint64_t glue(spe_lwhe_le, MEMSUFFIX) (target_ulong EA)
>  {
>      uint64_t ret;
> -    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
> -    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 16;
> +    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
> +    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 16;
>      return ret;
>  }
>  PPC_SPE_LD_OP(whe_le, spe_lwhe_le);
>  static always_inline void glue(spe_stwhe_le, MEMSUFFIX) (target_ulong EA,
>                                                           uint64_t data)
>  {
> -    glue(st16r, MEMSUFFIX)(EA, data >> 48);
> -    glue(st16r, MEMSUFFIX)(EA + 2, data >> 16);
> +    glue(stwr, MEMSUFFIX)(EA, data >> 48);
> +    glue(stwr, MEMSUFFIX)(EA + 2, data >> 16);
>  }
>  PPC_SPE_ST_OP(whe_le, spe_stwhe_le);
>  static always_inline uint64_t glue(spe_lwhou, MEMSUFFIX) (target_ulong EA)
> @@ -1290,24 +1161,24 @@ PPC_SPE_ST_OP(who, spe_stwho);
>  static always_inline uint64_t glue(spe_lwhou_le, MEMSUFFIX) (target_ulong EA)
>  {
>      uint64_t ret;
> -    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 32;
> -    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2);
> +    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 32;
> +    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2);
>      return ret;
>  }
>  PPC_SPE_LD_OP(whou_le, spe_lwhou_le);
>  static always_inline uint64_t glue(spe_lwhos_le, MEMSUFFIX) (target_ulong EA)
>  {
>      uint64_t ret;
> -    ret = ((uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA))) << 32;
> -    ret |= (uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA + 2));
> +    ret = ((uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA))) << 32;
> +    ret |= (uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA + 2));
>      return ret;
>  }
>  PPC_SPE_LD_OP(whos_le, spe_lwhos_le);
>  static always_inline void glue(spe_stwho_le, MEMSUFFIX) (target_ulong EA,
>                                                           uint64_t data)
>  {
> -    glue(st16r, MEMSUFFIX)(EA, data >> 32);
> -    glue(st16r, MEMSUFFIX)(EA + 2, data);
> +    glue(stwr, MEMSUFFIX)(EA, data >> 32);
> +    glue(stwr, MEMSUFFIX)(EA + 2, data);
>  }
>  PPC_SPE_ST_OP(who_le, spe_stwho_le);
>  #if !defined(TARGET_PPC64)
> @@ -1320,7 +1191,7 @@ PPC_SPE_ST_OP(wwo, spe_stwwo);
>  static always_inline void glue(spe_stwwo_le, MEMSUFFIX) (target_ulong EA,
>                                                           uint64_t data)
>  {
> -    glue(st32r, MEMSUFFIX)(EA, data);
> +    glue(stlr, MEMSUFFIX)(EA, data);
>  }
>  PPC_SPE_ST_OP(wwo_le, spe_stwwo_le);
>  #endif
> @@ -1334,14 +1205,14 @@ PPC_SPE_LD_OP(h, spe_lh);
>  static always_inline uint64_t glue(spe_lh_le, MEMSUFFIX) (target_ulong EA)
>  {
>      uint16_t tmp;
> -    tmp = glue(ld16r, MEMSUFFIX)(EA);
> +    tmp = glue(lduwr, MEMSUFFIX)(EA);
>      return ((uint64_t)tmp << 48) | ((uint64_t)tmp << 16);
>  }
>  PPC_SPE_LD_OP(h_le, spe_lh_le);
>  static always_inline uint64_t glue(spe_lwwsplat, MEMSUFFIX) (target_ulong EA)
>  {
>      uint32_t tmp;
> -    tmp = glue(ldl, MEMSUFFIX)(EA);
> +    tmp = glue(ldul, MEMSUFFIX)(EA);
>      return ((uint64_t)tmp << 32) | (uint64_t)tmp;
>  }
>  PPC_SPE_LD_OP(wwsplat, spe_lwwsplat);
> @@ -1349,7 +1220,7 @@ static always_inline
>  uint64_t glue(spe_lwwsplat_le, MEMSUFFIX) (target_ulong EA)
>  {
>      uint32_t tmp;
> -    tmp = glue(ld32r, MEMSUFFIX)(EA);
> +    tmp = glue(ldulr, MEMSUFFIX)(EA);
>      return ((uint64_t)tmp << 32) | (uint64_t)tmp;
>  }
>  PPC_SPE_LD_OP(wwsplat_le, spe_lwwsplat_le);
> @@ -1369,9 +1240,9 @@ uint64_t glue(spe_lwhsplat_le, MEMSUFFIX
>  {
>      uint64_t ret;
>      uint16_t tmp;
> -    tmp = glue(ld16r, MEMSUFFIX)(EA);
> +    tmp = glue(lduwr, MEMSUFFIX)(EA);
>      ret = ((uint64_t)tmp << 48) | ((uint64_t)tmp << 32);
> -    tmp = glue(ld16r, MEMSUFFIX)(EA + 2);
> +    tmp = glue(lduwr, MEMSUFFIX)(EA + 2);
>      ret |= ((uint64_t)tmp << 16) | (uint64_t)tmp;
>      return ret;
>  }
> Index: target-ppc/translate.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-ppc/translate.c,v
> retrieving revision 1.93
> diff -u -d -d -p -r1.93 translate.c
> --- target-ppc/translate.c	14 Oct 2007 07:07:07 -0000	1.93
> +++ target-ppc/translate.c	16 Oct 2007 11:39:08 -0000
> @@ -6756,7 +6756,7 @@ static always_inline int gen_intermediat
>                      ctx.nip, 1 - msr_pr, msr_ir);
>          }
>  #endif
> -        ctx.opcode = ldl_code(ctx.nip);
> +        ctx.opcode = ldul_code(ctx.nip);
>          if (msr_le) {
>              ctx.opcode = ((ctx.opcode & 0xFF000000) >> 24) |
>                  ((ctx.opcode & 0x00FF0000) >> 8) |
> Index: target-sh4/exec.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-sh4/exec.h,v
> retrieving revision 1.6
> diff -u -d -d -p -r1.6 exec.h
> --- target-sh4/exec.h	14 Oct 2007 07:07:08 -0000	1.6
> +++ target-sh4/exec.h	16 Oct 2007 11:39:08 -0000
> @@ -48,6 +48,9 @@ static inline int cpu_halted(CPUState *e
>  
>  #ifndef CONFIG_USER_ONLY
>  #include "softmmu_exec.h"
> +#define REVERSE_ENDIAN
> +#include "softmmu_exec.h"
> +#undef REVERSE_ENDIAN
>  #endif
>  
>  #define RETURN() __asm__ __volatile__("")
> Index: target-sh4/op_helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-sh4/op_helper.c,v
> retrieving revision 1.5
> diff -u -d -d -p -r1.5 op_helper.c
> --- target-sh4/op_helper.c	14 Oct 2007 07:07:08 -0000	1.5
> +++ target-sh4/op_helper.c	16 Oct 2007 11:39:08 -0000
> @@ -30,6 +30,21 @@ void do_raise_exception(void)
>  #define MMUSUFFIX _mmu
>  #define GETPC() (__builtin_return_address(0))
>  
> +/* Native-endian */
> +#define SHIFT 0
> +#include "softmmu_template.h"
> +
> +#define SHIFT 1
> +#include "softmmu_template.h"
> +
> +#define SHIFT 2
> +#include "softmmu_template.h"
> +
> +#define SHIFT 3
> +#include "softmmu_template.h"
> +
> +/* Reverse-endian */
> +#define REVERSE_ENDIAN
>  #define SHIFT 0
>  #include "softmmu_template.h"
>  
> @@ -41,6 +56,7 @@ void do_raise_exception(void)
>  
>  #define SHIFT 3
> ; #include "softmmu_template.h"
> +#undef REVERSE_ENDIAN
>  
>  void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr)
>  {
> Index: target-sh4/op_mem.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-sh4/op_mem.c,v
> retrieving revision 1.3
> diff -u -d -d -p -r1.3 op_mem.c
> --- target-sh4/op_mem.c	16 Sep 2007 21:08:05 -0000	1.3
> +++ target-sh4/op_mem.c	16 Oct 2007 11:39:08 -0000
> @@ -48,7 +48,7 @@ void glue(op_stw_T0_T1, MEMSUFFIX) (void
>  }
>  
>  void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
> -    T0 = glue(ldl, MEMSUFFIX) (T0);
> +    T0 = glue(ldul, MEMSUFFIX) (T0);
>      RETURN();
>  }
>  
> Index: target-sparc/exec.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-sparc/exec.h,v
> retrieving revision 1.22
> diff -u -d -d -p -r1.22 exec.h
> --- target-sparc/exec.h	14 Oct 2007 07:07:08 -0000	1.22
> +++ target-sparc/exec.h	16 Oct 2007 11:39:08 -0000
> @@ -100,6 +100,9 @@ void do_rdpsr();
>  /* XXX: move that to a generic header */
>  #if !defined(CONFIG_USER_ONLY)
>  #include "softmmu_exec.h"
> +#define REVERSE_ENDIAN
> +#include "softmmu_exec.h"
> +#undef REVERSE_ENDIAN
>  #endif /* !defined(CONFIG_USER_ONLY) */
>  
>  static inline void env_to_regs(void)
> Index: target-sparc/helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-sparc/helper.c,v
> retrieving revision 1.28
> diff -u -d -d -p -r1.28 helper.c
> --- target-sparc/helper.c	14 Oct 2007 07:07:08 -0000	1.28
> +++ target-sparc/helper.c	16 Oct 2007 11:39:08 -0000
> @@ -130,7 +130,7 @@ int get_physical_address (CPUState *env,
>      /* SPARC reference MMU table walk: Context table->L1->L2->PTE */
>      /* Context base + context number */
>      pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
> -    pde = ldl_phys(pde_ptr);
> +    pde = ldul_phys(pde_ptr);
>  
>      /* Ctx pde */
>      switch (pde & PTE_ENTRYTYPE_MASK) {
> @@ -142,7 +142,7 @@ int get_physical_address (CPUState *env,
>          return 4 << 2;
>      case 1: /* L0 PDE */
>          pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
> -        pde = ldl_phys(pde_ptr);
> +        pde = ldul_phys(pde_ptr);
>  
>          switch (pde & PTE_ENTRYTYPE_MASK) {
>          default:
> @@ -152,7 +152,7 @@ int get_physical_address (CPUState *env,
>              return (1 << 8) | (4 << 2);
>          case 1: /* L1 PDE */
>              pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
> -            pde = ldl_phys(pde_ptr);
> +            pde = ldul_phys(pde_ptr);
>  
>              switch (pde & PTE_ENTRYTYPE_MASK) {
>              default:
> @@ -162,7 +162,7 @@ int get_physical_address (CPUState *env,
>                  return (2 << 8) | (4 << 2);
>              case 1: /* L2 PDE */
>                  pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
> -                pde = ldl_phys(pde_ptr);
> +                pde = ldul_phys(pde_ptr);
>  
>                  switch (pde & PTE_ENTRYTYPE_MASK) {
>                  default:
> @@ -266,7 +266,7 @@ target_ulong mmu_probe(CPUState *env, ta
>      /* Context base + context number */
>      pde_ptr = (target_phys_addr_t)(env->mmuregs[1] << 4) +
>          (env->mmuregs[2] << 2);
> -    pde = ldl_phys(pde_ptr);
> +    pde = ldul_phys(pde_ptr);
>  
>      switch (pde & PTE_ENTRYTYPE_MASK) {
>      default:
> @@ -278,7 +278,7 @@ target_ulong mmu_probe(CPUState *env, ta
>          if (mmulev == 3)
>              return pde;
>          pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
> -        pde = ldl_phys(pde_ptr);
> +        pde = ldul_phys(pde_ptr);
>  
>          switch (pde & PTE_ENTRYTYPE_MASK) {
>          default:
> @@ -291,7 +291,7 @@ target_ulong mmu_probe(CPUState *env, ta
>              if (mmulev == 2)
>                  return pde;
>              pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
> -            pde = ldl_phys(pde_ptr);
> +            pde = ldul_phys(pde_ptr);
>  
>              switch (pde & PTE_ENTRYTYPE_MASK) {
>              default:
> @@ -304,7 +304,7 @@ target_ulong mmu_probe(CPUState *env, ta
>                  if (mmulev == 1)
>                      return pde;
>                  pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
> -                pde = ldl_phys(pde_ptr);
> +                pde = ldul_phys(pde_ptr);
>  
>                  switch (pde & PTE_ENTRYTYPE_MASK) {
>                  default:
> @@ -331,7 +331,7 @@ void dump_mmu(CPUState *env)
>  
>      printf("MMU dump:\n");
>      pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
> -    pde = ldl_phys(pde_ptr);
> +    pde = ldul_phys(pde_ptr);
>      printf("Root ptr: " TARGET_FMT_plx ", ctx: %d\n",
>             (target_phys_addr_t)env->mmuregs[1] << 4, env->mmuregs[2]);
>      for (n = 0, va = 0; n < 256; n++, va += 16 * 1024 * 1024) {
> Index: target-sparc/op_helper.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-sparc/op_helper.c,v
> retrieving revision 1.42
> diff -u -d -d -p -r1.42 op_helper.c
> --- target-sparc/op_helper.c	14 Oct 2007 07:07:08 -0000	1.42
> +++ target-sparc/op_helper.c	16 Oct 2007 11:39:08 -0000
> @@ -184,11 +184,11 @@ void helper_ld_asi(int asi, int size, in
>              break;
>          default:
>          case 4:
> -            ret = ldl_code(T0 & ~3);
> +            ret = ldul_code(T0 & ~3);
>              break;
>          case 8:
> -            ret = ldl_code(T0 & ~3);
> -            T0 = ldl_code((T0 + 4) & ~3);
> +            ret = ldul_code(T0 & ~3);
> +            T0 = ldul_code((T0 + 4) & ~3);
>              break;
>          }
>          break;
> @@ -202,11 +202,11 @@ void helper_ld_asi(int asi, int size, in
>              break;
>          default:
>          case 4:
> -            ret = ldl_user(T0 & ~3);
> +            ret = ldul_user(T0 & ~3);
>              break;
>          case 8:
> -            ret = ldl_user(T0 & ~3);
> -            T0 = ldl_user((T0 + 4) & ~3);
> +            ret = ldul_user(T0 & ~3);
> +            T0 = ldul_user((T0 + 4) & ~3);
>              break;
>          }
>          break;
> @@ -220,11 +220,11 @@ void helper_ld_asi(int asi, int size, in
>              break;
>          default:
>          case 4:
> -            ret = ldl_kernel(T0 & ~3);
> +            ret = ldul_kernel(T0 & ~3);
>              break;
>          case 8:
> -            ret = ldl_kernel(T0 & ~3);
> -            T0 = ldl_kernel((T0 + 4) & ~3);
> +            ret = ldul_kernel(T0 & ~3);
> +            T0 = ldul_kernel((T0 + 4) & ~3);
>              break;
>          }
>          break;
> @@ -243,11 +243,11 @@ void helper_ld_asi(int asi, int size, in
>              break;
>          default:
>          case 4:
> -            ret = ldl_phys(T0 & ~3);
> +            ret = ldul_phys(T0 & ~3);
>              break;
>          case 8:
> -            ret = ldl_phys(T0 & ~3);
> -            T0 = ldl_phys((T0 + 4) & ~3);
> +            ret = ldul_phys(T0 & ~3);
> +            T0 = ldul_phys((T0 + 4) & ~3);
>              break;
>          }
>          break;
> @@ -264,13 +264,13 @@ void helper_ld_asi(int asi, int size, in
>              break;
>          default:
>          case 4:
> -            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
> -                           | ((target_phys_addr_t)(asi & 0xf) << 32));
> +            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
> +                            | ((target_phys_addr_t)(asi & 0xf) << 32));
>              break;
>          case 8:
> -            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
> -                           | ((target_phys_addr_t)(asi & 0xf) << 32));
> -            T0 = ldl_phys((target_phys_addr_t)((T0 + 4) & ~3)
> +            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
> +                            | ((target_phys_addr_t)(asi & 0xf) << 32));
> +            T0 = ldul_phys((target_phys_addr_t)((T0 + 4) & ~3)
>                             | ((target_phys_addr_t)(asi & 0xf) << 32));
>              break;
>          }
> @@ -422,7 +422,7 @@ void helper_st_asi(int asi, int size)
>              uint32_t src = T1 & ~3, dst = T0 & ~3, temp;
>  
>              for (i = 0; i < 32; i += 4, src += 4, dst += 4) {
> -                temp = ldl_kernel(src);
> +                temp = ldul_kernel(src);
>                  stl_kernel(dst, temp);
>              }
>          }
> @@ -514,6 +514,24 @@ void helper_ld_asi(int asi, int size, in
>      switch (asi) {
>      case 0x80: // Primary
>      case 0x82: // Primary no-fault
> +        {
> +            switch(size) {
> +            case 1:
> +                ret = ldub_raw(T0);
> +                break;
> +            case 2:
> +                ret = lduw_raw(T0 & ~1);
> +                break;
> +            case 4:
> +                ret = ldul_raw(T0 & ~3);
> +                break;
> +            default:
> +            case 8:
> +                ret = lduq_raw(T0 & ~7);
> +                break;
> +            }
> +        }
> +        break;
>      case 0x88: // Primary LE
>      case 0x8a: // Primary no-fault LE
>          {
> @@ -522,14 +540,14 @@ void helper_ld_asi(int asi, int size, in
>                  ret = ldub_raw(T0);
>                  break;
>              case 2:
> -                ret = lduw_raw(T0 & ~1);
> +                ret = lduwr_raw(T0 & ~1);
>                  break;
>              case 4:
> -                ret = ldl_raw(T0 & ~3);
> +                ret = ldulr_raw(T0 & ~3);
>                  break;
>              default:
>              case 8:
> -                ret = ldq_raw(T0 & ~7);
> +                ret = lduqr_raw(T0 & ~7);
>                  break;
>              }
>          }
> @@ -544,29 +562,6 @@ void helper_ld_asi(int asi, int size, in
>          break;
>      }
>  
> -    /* Convert from little endian */
> -    switch (asi) {
> -    case 0x88: // Primary LE
> -    case 0x89: // Secondary LE
> -    case 0x8a: // Primary no-fault LE
> -    case 0x8b: // Secondary no-fault LE
> -        switch(size) {
> -        case 2:
> -            ret = bswap16(ret);
> -            break;
> -        case 4:
> -            ret = bswap32(ret);
> -            break;
> -        case 8:
> -            ret = bswap64(ret);
> -            break;
> -        default:
> -            break;
> -        }
> -    default:
> -        break;
> -    }
> -
>      /* Convert to signed number */
>      if (sign) {
>          switch(size) {
> @@ -591,30 +586,8 @@ void helper_st_asi(int asi, int size)
>      if (asi < 0x80)
>          raise_exception(TT_PRIV_ACT);
>  
> -    /* Convert to little endian */
> -    switch (asi) {
> -    case 0x88: // Primary LE
> -    case 0x89: // Secondary LE
> -        switch(size) {
> -        case 2:
> -            T0 = bswap16(T0);
> -            break;
> -        case 4:
> -            T0 = bswap32(T0);
> -            break;
> -        case 8:
> -            T0 = bswap64(T0);
> -            break;
> -        default:
> -            break;
> -        }
> -    default:
> -        break;
> -    }
> -
>      switch(asi) {
>      case 0x80: // Primary
> -    case 0x88: // Primary LE
>          {
>              switch(size) {
>              case 1:
> @@ -633,6 +606,25 @@ void helper_st_asi(int asi, int size)
>              }
>          }
>          break;
> +    case 0x88: // Primary LE
> +        {
> +            switch(size) {
> +            case 1:
> +                stb_raw(T0, T1);
> +                break;
> +            case 2:
> +                stwr_raw(T0 & ~1, T1);
> +                break;
> +            case 4:
> +                stlr_raw(T0 & ~3, T1);
> +                break;
> +            case 8:
> +            default:
> +                stqr_raw(T0 & ~7, T1);
> +                break;
> +            }
> +        }
> +        break;
>      case 0x81: // Secondary
>      case 0x89: // Secondary LE
>          // XXX
> @@ -659,11 +651,8 @@ void helper_ld_asi(int asi, int size, in
>  
>      switch (asi) {
>      case 0x10: // As if user primary
> -    case 0x18: // As if user primary LE
>      case 0x80: // Primary
>      case 0x82: // Primary no-fault
> -    case 0x88: // Primary LE
> -    case 0x8a: // Primary no-fault LE
>          if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
>              switch(size) {
>              case 1:
> @@ -673,11 +662,11 @@ void helper_ld_asi(int asi, int size, in
>                  ret = lduw_kernel(T0 & ~1);
>                  break;
>              case 4:
> -                ret = ldl_kernel(T0 & ~3);
> +                ret = ldul_kernel(T0 & ~3);
>                  break;
>              default:
>              case 8:
> -                ret = ldq_kernel(T0 & ~7);
> +                ret = lduq_kernel(T0 & ~7);
>                  break;
>              }
>          } else {
> @@ -689,17 +678,90 @@ void helper_ld_asi(int asi, int size, in
>                  ret = lduw_user(T0 & ~1);
>                  break;
>              case 4:
> -                ret = ldl_user(T0 & ~3);
> +                ret = ldul_user(T0 & ~3);
>                  break;
>              default:
>              case 8:
> -                ret = ldq_user(T0 & ~7);
> +                ret = lduq_user(T0 & ~7);
> +                break;
> +            }
> +        }
> +        break;
> +    case 0x18: // As if user primary LE
> +    case 0x88: // Primary LE
> +    case 0x8a: // Primary no-fault LE
> +        if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
> +            if (env->hpstate & HS_PRIV) {
> +                switch(size) {
> +                case 1:
> +                    ret = ldub_hypv(T0);
> +                    break;
> +                case 2:
> +                    ret = lduwr_hypv(T0 & ~1);
> +                    break;
> +                case 4:
> +                    ret = ldulr_hypv(T0 & ~3);
> +                    break;
> +                default:
> +                case 8:
> +                    ret = lduqr_hypv(T0 & ~7);
> +                    break;
> +                }
> +            } else {
> +                switch(size) {
> +                case 1:
> +                    ret = ldub_kernel(T0);
> +                    break;
> +                case 2:
> +                    ret = lduwr_kernel(T0 & ~1);
> +                    break;
> +                case 4:
> +                    ret = ldulr_kernel(T0 & ~3);
> +                    break;
> +                default:
> +                case 8:
> +                    ret = lduqr_kernel(T0 & ~7);
> +                    break;
> +                }
> +            }
> +        } else {
> +            switch(size) {
> +            case 1:
> +                ret = ldub_user(T0);
> +                break;
> +            case 2:
> +                ret = lduwr_user(T0 & ~1);
> +                break;
> +            case 4:
> +                ret = ldulr_user(T0 & ~3);
> +                break;
> +            default:
> +            case 8:
> +                ret = lduqr_user(T0 & ~7);
>                  break;
>              }
>          }
>          break;
>      case 0x14: // Bypass
>      case 0x15: // Bypass, non-cacheable
> +        {
> +            switch(size) {
> +            case 1:
> +                ret = ldub_phys(T0);
> +                break;
> +            case 2:
> +                ret = lduw_phys(T0 & ~1);
> +                break;
> +            case 4:
> +                ret = ldul_phys(T0 & ~3);
> +                break;
> +            default:
> +            case 8:
> +                ret = lduq_phys(T0 & ~7);
> +                break;
> +            }
> +            break;
> +        }
>      case 0x1c: // Bypass LE
>      case 0x1d: // Bypass, non-cacheable LE
>          {
> @@ -708,14 +770,14 @@ void helper_ld_asi(int asi, int size, in
>                  ret = ldub_phys(T0);
>                  break;
>              case 2:
> -                ret = lduw_phys(T0 & ~1);
> +                ret = bswap16(lduw_phys(T0 & ~1));
>                  break;
>              case 4:
> -                ret = ldl_phys(T0 & ~3);
> +                ret = bswap32(ldul_phys(T0 & ~3));
>                  break;
>              default:
>              case 8:
> -                ret = ldq_phys(T0 & ~7);
> +                ret = bswap64(lduq_phys(T0 & ~7));
>                  break;
>              }
>              break;
> @@ -803,34 +865,6 @@ void helper_ld_asi(int asi, int size, in
>          break;
>      }
>  
> -    /* Convert from little endian */
> -    switch (asi) {
> -    case 0x0c: // Nucleus Little Endian (LE)
> -    case 0x18: // As if user primary LE
> -    case 0x19: // As if user secondary LE
> -    case 0x1c: // Bypass LE
> -    case 0x1d: // Bypass, non-cacheable LE
> -    case 0x88: // Primary LE
> -    case 0x89: // Secondary LE
> -    case 0x8a: // Primary no-fault LE
> -    case 0x8b: // Secondary no-fault LE
> -        switch(size) {
> -        case 2:
> -            ret = bswap16(ret);
> -            break;
> -        case 4:
> -            ret = bswap32(ret);
> -            break;
> -        case 8:
> -            ret = bswap64(ret);
> -            break;
> -        default:
> -            break;
> -        }
> -    default:
> -        break;
> -    }
> -
>      /* Convert to signed number */
>      if (sign) {
>          switch(size) {
> @@ -855,37 +889,9 @@ void helper_st_asi(int asi, int size)
>      if (asi < 0x80 && (env->pstate & PS_PRIV) == 0)
>          raise_exception(TT_PRIV_ACT);
>  
> -    /* Convert to little endian */
> -    switch (asi) {
> -    case 0x0c: // Nucleus Little Endian (LE)
> -    case 0x18: // As if user primary LE
> -    case 0x19: // As if user secondary LE
> -    case 0x1c: // Bypass LE
> -    case 0x1d: // Bypass, non-cacheable LE
> -    case 0x88: // Primary LE
> -    case 0x89: // Secondary LE
> -        switch(size) {
> -        case 2:
> -            T0 = bswap16(T0);
> -            break;
> -        case 4:
> -            T0 = bswap32(T0);
> -            break;
> -        case 8:
> -            T0 = bswap64(T0);
> -            break;
> -        default:
> -            break;
> -        }
> -    default:
> -        break;
> -    }
> -
>      switch(asi) {
>      case 0x10: // As if user primary
> -    case 0x18: // As if user primary LE
>      case 0x80: // Primary
> -    case 0x88: // Primary LE
>          if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
>              switch(size) {
>              case 1:
> @@ -920,10 +926,62 @@ void helper_st_asi(int asi, int size)
>              }
>          }
>          break;
> +    case 0x18: // As if user primary LE
> +    case 0x88: // Primary LE
> +        if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
> +            if (env->hpstate & HS_PRIV) {
> +                switch(size) {
> +                case 1:
> +                    stb_hypv(T0, T1);
> +                    break;
> +                case 2:
> +                    stwr_hypv(T0 & ~1, T1);
> +                    break;
> +                case 4:
> +                    stlr_hypv(T0 & ~3, T1);
> +                    break;
> +                case 8:
> +                default:
> +                    stqr_hypv(T0 & ~7, T1);
> +                    break;
> +                }
> +            } else {
> +                switch(size) {
> +                case 1:
> +                    stb_kernel(T0, T1);
> +                    break;
> +                case 2:
> +                    stwr_kernel(T0 & ~1, T1);
> +                    break;
> +                case 4:
> +                    stlr_kernel(T0 & ~3, T1);
> +                    break;
> +                case 8:
> +                default:
> +                    stqr_kernel(T0 & ~7, T1);
> +                    break;
> +                }
> +            }
> +        } else {
> +            switch(size) {
> +            case 1:
> +                stb_user(T0, T1);
> +                break;
> +            case 2:
> +                stwr_user(T0 & ~1, T1);
> +                break;
> +            case 4:
> +                stlr_user(T0 & ~3, T1);
> +                break;
> +            case 8:
> +            default:
> +                stqr_user(T0 & ~7, T1);
> +                break;
> +            }
> +        }
> +        break;
>      case 0x14: // Bypass
>      case 0x15: // Bypass, non-cacheable
> -    case 0x1c: // Bypass LE
> -    case 0x1d: // Bypass, non-cacheable LE
>          {
>              switch(size) {
>              case 1:
> @@ -942,6 +1000,26 @@ void helper_st_asi(int asi, int size)
>              }
>          }
>          return;
> +    case 0x1c: // Bypass LE
> +    case 0x1d: // Bypass, non-cacheable LE
> +        {
> +            switch(size) {
> +            case 1:
> +                stb_phys(T0, T1);
> +                break;
> +            case 2:
> +                stw_phys(T0 & ~1, bswap16(T1));
> +                break;
> +            case 4:
> +                stl_phys(T0 & ~3, bswap32(T1));
> +                break;
> +            case 8:
> +            default:
> +                stq_phys(T0 & ~7, bswap64(T1));
> +                break;
> +            }
> +        }
> +        return;
>      case 0x04: // Nucleus
>      case 0x0c: // Nucleus Little Endian (LE)
>      case 0x11: // As if user secondary
> @@ -1497,6 +1575,21 @@ static void do_unaligned_access(target_u
>  #define ALIGNED_ONLY
>  #define GETPC() (__builtin_return_address(0))
>  
> +/* Native-endian */
> +#define SHIFT 0
> +#include "softmmu_template.h"
> +
> +#define SHIFT 1
> +#include "softmmu_template.h"
> +
> +#define SHIFT 2
> +#include "softmmu_template.h"
> +
> +#define SHIFT 3
> +#include "softmmu_template.h"
> +
> +/* Reverse-endian */
> +#define REVERSE_ENDIAN
>  #define SHIFT 0
>  #include "softmmu_template.h"
>  
> @@ -1508,6 +1601,7 @@ static void do_unaligned_access(target_u
>  
>  #define SHIFT 3
>  #include "softmmu_template.h"
> +#undef REVERSE_ENDIAN
>  
>  static void do_unaligned_access(target_ulong addr, int is_write, int is_user,
>                                  void *retaddr)
> Index: target-sparc/op_mem.h
> ===================================================================
> RCS file: /sources/qemu/qemu/target-sparc/op_mem.h,v
> retrieving revision 1.10
> diff -u -d -d -p -r1.10 op_mem.h
> --- target-sparc/op_mem.h	21 Sep 2007 19:10:53 -0000	1.10
> +++ target-sparc/op_mem.h	16 Oct 2007 11:39:08 -0000
> @@ -17,7 +17,7 @@ void OPPROTO glue(glue(op_, name), MEMSU
>      glue(op, MEMSUFFIX)(T0, T1);                                      \
>  }
>  
> -SPARC_LD_OP(ld, ldl);
> +SPARC_LD_OP(ld, ldul);
>  SPARC_LD_OP(ldub, ldub);
>  SPARC_LD_OP(lduh, lduw);
>  SPARC_LD_OP_S(ldsb, ldsb);
> @@ -42,15 +42,15 @@ void OPPROTO glue(op_ldstub, MEMSUFFIX)(
>  
>  void OPPROTO glue(op_swap, MEMSUFFIX)(void)
>  {
> -    target_ulong tmp = glue(ldl, MEMSUFFIX)(T0);
> +    target_ulong tmp = glue(ldul, MEMSUFFIX)(T0);
>      glue(stl, MEMSUFFIX)(T0, T1);       /* XXX: Should be Atomically */
>      T1 = tmp;
>  }
>  
>  void OPPROTO glue(op_ldd, MEMSUFFIX)(void)
>  {
> -    T1 = glue(ldl, MEMSUFFIX)(T0);
> -    T0 = glue(ldl, MEMSUFFIX)((T0 + 4));
> +    T1 = glue(ldul, MEMSUFFIX)(T0);
> +    T0 = glue(ldul, MEMSUFFIX)((T0 + 4));
>  }
>  
>  /***                         Floating-point store                          ***/
> @@ -76,17 +76,9 @@ void OPPROTO glue(op_lddf, MEMSUFFIX) (v
>  }
>  
>  #ifdef TARGET_SPARC64
> -void OPPROTO glue(op_lduw, MEMSUFFIX)(void)
> -{
> -    T1 = (uint64_t)(glue(ldl, MEMSUFFIX)(T0) & 0xffffffff);
> -}
> -
> -void OPPROTO glue(op_ldsw, MEMSUFFIX)(void)
> -{
> -    T1 = (int64_t)(glue(ldl, MEMSUFFIX)(T0) & 0xffffffff);
> -}
> -
> -SPARC_LD_OP(ldx, ldq);
> +SPARC_LD_OP(lduw, ldul);
> +SPARC_LD_OP(ldsw, ldsl);
> +SPARC_LD_OP(ldx, lduq);
>  SPARC_ST_OP(stx, stq);
>  #endif
>  #undef MEMSUFFIX
> Index: target-sparc/translate.c
> ===================================================================
> RCS file: /sources/qemu/qemu/target-sparc/translate.c,v
> retrieving revision 1.75
> diff -u -d -d -p -r1.75 translate.c
> --- target-sparc/translate.c	14 Oct 2007 07:07:08 -0000	1.75
> +++ target-sparc/translate.c	16 Oct 2007 11:39:08 -0000
> @@ -1089,7 +1089,7 @@ static void disas_sparc_insn(DisasContex
>  {
>      unsigned int insn, opc, rs1, rs2, rd;
>  
> -    insn = ldl_code(dc->pc);
> +    insn = ldul_code(dc->pc);
>      opc = GET_FIELD(insn, 0, 1);
>  
>      rd = GET_FIELD(insn, 2, 6);
>   

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-15 17:45       ` Blue Swirl
@ 2007-10-16 20:27         ` J. Mayer
  2007-11-23 12:55           ` Tero Kaarlela
  0 siblings, 1 reply; 20+ messages in thread
From: J. Mayer @ 2007-10-16 20:27 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 3056 bytes --]

On Mon, 2007-10-15 at 20:45 +0300, Blue Swirl wrote:
> On 10/15/07, Blue Swirl <blauwirbel@gmail.com> wrote:
> > On 10/15/07, J. Mayer <l_indien@magic.fr> wrote:
> > > On Sun, 2007-10-14 at 15:59 +0300, Blue Swirl wrote:
> > > > On 10/14/07, J. Mayer <l_indien@magic.fr> wrote:
> > > > > Here's an updated version of the patch against current CVS.
> > > > > This patches provides reverse-endian, little-endian and big-endian
> > > > > memory accessors, available with and without softmmu. It also provides
> > > > > an IO_MEM_REVERSE TLB flag to allow future support of per-page
> > > > > endianness control, which is required by some targets CPU emulations.
> > > > > Having reverse-endian memory accessors also make it possible to optimise
> > > > > reverse-endian memory access when the target CPU has dedicated
> > > > > instructions. For now, it includes optimisations for the PowerPC target.
> > > >
> > > > This breaks Sparc32 softmmu, I get a black screen. Your changes to
> > > > target-sparc and hw/sun4m.c look fine, so the problem could be in IO?
> > >
> > > Did it worked before my commits ? I may have done something wrong during
> > > the merge...
> > > I will do more checks and more tests...
> >
> > If I disable the IOSWAP code, black screen is gone. I think this is
> > logical: the io accessors return host CPU values, therefore no byte
> > swapping need to be performed.
> >
> > The attached version works for me.
> 
> This patch takes the reverse endian functions into use for Sparc.
> 
> I added hypervisor versions of the functions. This is getting a bit
> ugly, time for #include magic? Physical versions could be useful too.

My first reaction is to say that it seems pointless to define hypervisor
access routines when emulating user-mode only. It seems more likely to
me that any attempt to do such an access in user-mode code would raise a
privilege exception. If such an access is really possible for any
reason, I think you're right, it might be time to have a template
header, called the same way the softmmu_template currently is.
Then, I tried to go this way, adding a "nommu_header.h" and a
"nommu_template.h" files.
I also completed the API, replacing ldq with lduq and adding ldsq
accessors.
Here's the updated patch, including the Sparc specific reverse-endian
changes.
I did test PowerPC, PowerPC64, i386 softmmu cases and PowerPC linux-user
case. I also succesfully launched the Sparc, ARM and Coldfire tests
images available from the Qemu page. I did not succeed with Mips test
case, but the problems comes during the initialisation, before the
emulation starts, failing to load the initrd. It seems to me that the
problems come from the fact I'm running on a 64 bits host: it seems to
run OK when launch in 32 bits mode but not in 64 bits mode, using clean
CVS sources. I will check this and propose a fix, if needed.
There is still at least one bug in that patch: the reverse-endian I/O
case, in cpu_physical_memory_rw (exec.c) is not handled and is to be
fixed.

-- 
J. Mayer <l_indien@magic.fr>
Never organized

[-- Attachment #2: softmmu_reverse_endian.diff --]
[-- Type: text/x-patch, Size: 214329 bytes --]

? pc-bios/mips_bios.bin
Index: cpu-all.h
===================================================================
RCS file: /sources/qemu/qemu/cpu-all.h,v
retrieving revision 1.76
diff -u -d -d -p -r1.76 cpu-all.h
--- cpu-all.h	23 Sep 2007 15:28:03 -0000	1.76
+++ cpu-all.h	16 Oct 2007 11:39:03 -0000
@@ -149,7 +149,7 @@ typedef union {
  *   f    : float access
  *
  * sign is:
- * (empty): for floats or 32 bit size
+ * (empty): for floats
  *   u    : unsigned
  *   s    : signed
  *
@@ -161,9 +161,9 @@ typedef union {
  *
  * endian is:
  * (empty): target cpu endianness or 8 bit access
- *   r    : reversed target cpu endianness (not implemented yet)
- *   be   : big endian (not implemented yet)
- *   le   : little endian (not implemented yet)
+ *   r    : reversed target cpu endianness
+ *   be   : big endian
+ *   le   : little endian
  *
  * access_type is:
  *   raw    : host memory access
@@ -215,24 +215,45 @@ static inline int ldsw_le_p(void *ptr)
 #endif
 }
 
-static inline int ldl_le_p(void *ptr)
+static inline target_long ldul_le_p(void *ptr)
 {
 #ifdef __powerpc__
     int val;
     __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
-    return val;
+    return (uint32_t)val;
 #else
     uint8_t *p = ptr;
     return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
 #endif
 }
 
-static inline uint64_t ldq_le_p(void *ptr)
+static inline target_long ldsl_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return (int32_t)val;
+#else
+    uint8_t *p = ptr;
+    return (int32_t)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24));
+#endif
+}
+
+static inline uint64_t lduq_le_p(void *ptr)
 {
     uint8_t *p = ptr;
     uint32_t v1, v2;
-    v1 = ldl_le_p(p);
-    v2 = ldl_le_p(p + 4);
+    v1 = ldul_le_p(p);
+    v2 = ldul_le_p(p + 4);
+    return v1 | ((uint64_t)v2 << 32);
+}
+
+static inline int64_t ldsq_le_p(void *ptr)
+{
+    uint8_t *p = ptr;
+    uint32_t v1, v2;
+    v1 = ldul_le_p(p);
+    v2 = ldul_le_p(p + 4);
     return v1 | ((uint64_t)v2 << 32);
 }
 
@@ -275,7 +296,7 @@ static inline float32 ldfl_le_p(void *pt
         float32 f;
         uint32_t i;
     } u;
-    u.i = ldl_le_p(ptr);
+    u.i = ldul_le_p(ptr);
     return u.f;
 }
 
@@ -292,8 +313,8 @@ static inline void stfl_le_p(void *ptr, 
 static inline float64 ldfq_le_p(void *ptr)
 {
     CPU_DoubleU u;
-    u.l.lower = ldl_le_p(ptr);
-    u.l.upper = ldl_le_p(ptr + 4);
+    u.l.lower = ldul_le_p(ptr);
+    u.l.upper = ldul_le_p(ptr + 4);
     return u.d;
 }
 
@@ -317,16 +338,26 @@ static inline int ldsw_le_p(void *ptr)
     return *(int16_t *)ptr;
 }
 
-static inline int ldl_le_p(void *ptr)
+static inline target_long ldul_le_p(void *ptr)
 {
     return *(uint32_t *)ptr;
 }
 
-static inline uint64_t ldq_le_p(void *ptr)
+static inline target_long ldsl_le_p(void *ptr)
+{
+    return *(int32_t *)ptr;
+}
+
+static inline uint64_t lduq_le_p(void *ptr)
 {
     return *(uint64_t *)ptr;
 }
 
+static inline int64_t ldsq_le_p(void *ptr)
+{
+    return *(int64_t *)ptr;
+}
+
 static inline void stw_le_p(void *ptr, int v)
 {
     *(uint16_t *)ptr = v;
@@ -397,7 +428,7 @@ static inline int ldsw_be_p(void *ptr)
 #endif
 }
 
-static inline int ldl_be_p(void *ptr)
+static inline target_long ldul_be_p(void *ptr)
 {
 #if defined(__i386__) || defined(__x86_64__)
     int val;
@@ -405,18 +436,41 @@ static inline int ldl_be_p(void *ptr)
                   "bswap %0\n"
                   : "=r" (val)
                   : "m" (*(uint32_t *)ptr));
-    return val;
+    return (uint32_t)val;
 #else
     uint8_t *b = (uint8_t *) ptr;
     return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
 #endif
 }
 
-static inline uint64_t ldq_be_p(void *ptr)
+static inline target_long ldsl_be_p(void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+    int val;
+    asm volatile ("movl %1, %0\n"
+                  "bswap %0\n"
+                  : "=r" (val)
+                  : "m" (*(uint32_t *)ptr));
+    return (int32_t)val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return (int32_t)((b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]);
+#endif
+}
+
+static inline uint64_t lduq_be_p(void *ptr)
 {
     uint32_t a,b;
-    a = ldl_be_p(ptr);
-    b = ldl_be_p(ptr+4);
+    a = ldul_be_p(ptr);
+    b = ldul_be_p(ptr+4);
+    return (((uint64_t)a<<32)|b);
+}
+
+static inline int64_t ldsq_be_p(void *ptr)
+{
+    uint32_t a,b;
+    a = ldul_be_p(ptr);
+    b = ldul_be_p(ptr+4);
     return (((uint64_t)a<<32)|b);
 }
 
@@ -464,7 +518,7 @@ static inline float32 ldfl_be_p(void *pt
         float32 f;
         uint32_t i;
     } u;
-    u.i = ldl_be_p(ptr);
+    u.i = ldul_be_p(ptr);
     return u.f;
 }
 
@@ -481,8 +535,8 @@ static inline void stfl_be_p(void *ptr, 
 static inline float64 ldfq_be_p(void *ptr)
 {
     CPU_DoubleU u;
-    u.l.upper = ldl_be_p(ptr);
-    u.l.lower = ldl_be_p(ptr + 4);
+    u.l.upper = ldul_be_p(ptr);
+    u.l.lower = ldul_be_p(ptr + 4);
     return u.d;
 }
 
@@ -506,16 +560,26 @@ static inline int ldsw_be_p(void *ptr)
     return *(int16_t *)ptr;
 }
 
-static inline int ldl_be_p(void *ptr)
+static inline target_long ldul_be_p(void *ptr)
 {
     return *(uint32_t *)ptr;
 }
 
-static inline uint64_t ldq_be_p(void *ptr)
+static inline target_long ldsl_be_p(void *ptr)
+{
+    return *(int32_t *)ptr;
+}
+
+static inline uint64_t lduq_be_p(void *ptr)
 {
     return *(uint64_t *)ptr;
 }
 
+static inline int64_t ldsq_be_p(void *ptr)
+{
+    return *(int64_t *)ptr;
+}
+
 static inline void stw_be_p(void *ptr, int v)
 {
     *(uint16_t *)ptr = v;
@@ -557,10 +621,13 @@ static inline void stfq_be_p(void *ptr, 
 
 /* target CPU memory access functions */
 #if defined(TARGET_WORDS_BIGENDIAN)
+/* native-endian */
 #define lduw_p(p) lduw_be_p(p)
 #define ldsw_p(p) ldsw_be_p(p)
-#define ldl_p(p) ldl_be_p(p)
-#define ldq_p(p) ldq_be_p(p)
+#define ldul_p(p) ldul_be_p(p)
+#define ldsl_p(p) ldsl_be_p(p)
+#define lduq_p(p) lduq_be_p(p)
+#define ldsq_p(p) ldsq_be_p(p)
 #define ldfl_p(p) ldfl_be_p(p)
 #define ldfq_p(p) ldfq_be_p(p)
 #define stw_p(p, v) stw_be_p(p, v)
@@ -568,11 +635,28 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_p(p, v) stq_be_p(p, v)
 #define stfl_p(p, v) stfl_be_p(p, v)
 #define stfq_p(p, v) stfq_be_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_le_p(p)
+#define ldswr_p(p) ldsw_le_p(p)
+#define ldulr_p(p) ldul_le_p(p)
+#define ldslr_p(p) ldsl_le_p(p)
+#define lduqr_p(p) lduq_le_p(p)
+#define ldsqr_p(p) ldsq_le_p(p)
+#define ldflr_p(p) ldfl_le_p(p)
+#define ldfqr_p(p) ldfq_le_p(p)
+#define stwr_p(p, v) stw_le_p(p, v)
+#define stlr_p(p, v) stl_le_p(p, v)
+#define stqr_p(p, v) stq_le_p(p, v)
+#define stflr_p(p, v) stfl_le_p(p, v)
+#define stfqr_p(p, v) stfq_le_p(p, v)
 #else
+/* native-endian */
 #define lduw_p(p) lduw_le_p(p)
 #define ldsw_p(p) ldsw_le_p(p)
-#define ldl_p(p) ldl_le_p(p)
-#define ldq_p(p) ldq_le_p(p)
+#define ldul_p(p) ldul_le_p(p)
+#define ldsl_p(p) ldsl_le_p(p)
+#define lduq_p(p) lduq_le_p(p)
+#define ldsq_p(p) ldsq_le_p(p)
 #define ldfl_p(p) ldfl_le_p(p)
 #define ldfq_p(p) ldfq_le_p(p)
 #define stw_p(p, v) stw_le_p(p, v)
@@ -580,6 +664,20 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_p(p, v) stq_le_p(p, v)
 #define stfl_p(p, v) stfl_le_p(p, v)
 #define stfq_p(p, v) stfq_le_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_be_p(p)
+#define ldswr_p(p) ldsw_be_p(p)
+#define ldulr_p(p) ldul_be_p(p)
+#define ldslr_p(p) ldsl_be_p(p)
+#define lduqr_p(p) lduq_be_p(p)
+#define ldsqr_p(p) ldsq_be_p(p)
+#define ldflr_p(p) ldfl_be_p(p)
+#define ldfqr_p(p) ldfq_be_p(p)
+#define stwr_p(p, v) stw_be_p(p, v)
+#define stlr_p(p, v) stl_be_p(p, v)
+#define stqr_p(p, v) stq_be_p(p, v)
+#define stflr_p(p, v) stfl_be_p(p, v)
+#define stfqr_p(p, v) stfq_be_p(p, v)
 #endif
 
 /* MMU memory access macros */
@@ -605,12 +703,15 @@ static inline void stfq_be_p(void *ptr, 
 #define laddr(x) (uint8_t *)(long)(x)
 #endif
 
+/* native-endian */
 #define ldub_raw(p) ldub_p(laddr((p)))
 #define ldsb_raw(p) ldsb_p(laddr((p)))
 #define lduw_raw(p) lduw_p(laddr((p)))
 #define ldsw_raw(p) ldsw_p(laddr((p)))
-#define ldl_raw(p) ldl_p(laddr((p)))
-#define ldq_raw(p) ldq_p(laddr((p)))
+#define ldul_raw(p) ldul_p(laddr((p)))
+#define ldsl_raw(p) ldsl_p(laddr((p)))
+#define lduq_raw(p) lduq_p(laddr((p)))
+#define ldsq_raw(p) ldsq_p(laddr((p)))
 #define ldfl_raw(p) ldfl_p(laddr((p)))
 #define ldfq_raw(p) ldfq_p(laddr((p)))
 #define stb_raw(p, v) stb_p(saddr((p)), v)
@@ -619,47 +720,77 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_raw(p, v) stq_p(saddr((p)), v)
 #define stfl_raw(p, v) stfl_p(saddr((p)), v)
 #define stfq_raw(p, v) stfq_p(saddr((p)), v)
-
+/* reverse endian */
+#define ldubr_raw(p) ldub_p(laddr((p)))
+#define ldsbr_raw(p) ldsb_p(laddr((p)))
+#define lduwr_raw(p) lduwr_p(laddr((p)))
+#define ldswr_raw(p) ldswr_p(laddr((p)))
+#define ldulr_raw(p) ldulr_p(laddr((p)))
+#define ldslr_raw(p) ldslr_p(laddr((p)))
+#define lduqr_raw(p) lduqr_p(laddr((p)))
+#define ldsqr_raw(p) ldsqr_p(laddr((p)))
+#define ldflr_raw(p) ldflr_p(laddr((p)))
+#define ldfqr_raw(p) ldfqr_p(laddr((p)))
+#define stbr_raw(p, v) stb_p(saddr((p)), v)
+#define stwr_raw(p, v) stwr_p(saddr((p)), v)
+#define stlr_raw(p, v) stlr_p(saddr((p)), v)
+#define stqr_raw(p, v) stqr_p(saddr((p)), v)
+#define stflr_raw(p, v) stflr_p(saddr((p)), v)
+#define stfqr_raw(p, v) stfqr_p(saddr((p)), v)
+/* big-endian accesses */
+#define SUFFIX _raw
+#define ESUFFIX _be
+#define ADDR_TYPE void *
+#define LADDR_TYPE (void *)
+#ifdef TARGET_WORDS_BIGENDIAN
+#define LSUFFIX _raw
+#else
+#define LSUFFIX r_raw
+#endif
+#include "nommu_template.h"
+#undef ADDR_TYPE
+#undef SUFFIX
+/* little-endian accesses */
+#define SUFFIX _raw
+#define ESUFFIX _le
+#define ADDR_TYPE void *
+#define LADDR_TYPE (void *)
+#ifdef TARGET_WORDS_BIGENDIAN
+#define LSUFFIX r_raw
+#else
+#define LSUFFIX _raw
+#endif
+#include "nommu_template.h"
+#undef ADDR_TYPE
+#undef SUFFIX
 
 #if defined(CONFIG_USER_ONLY)
 
-/* if user mode, no other memory access functions */
-#define ldub(p) ldub_raw(p)
-#define ldsb(p) ldsb_raw(p)
-#define lduw(p) lduw_raw(p)
-#define ldsw(p) ldsw_raw(p)
-#define ldl(p) ldl_raw(p)
-#define ldq(p) ldq_raw(p)
-#define ldfl(p) ldfl_raw(p)
-#define ldfq(p) ldfq_raw(p)
-#define stb(p, v) stb_raw(p, v)
-#define stw(p, v) stw_raw(p, v)
-#define stl(p, v) stl_raw(p, v)
-#define stq(p, v) stq_raw(p, v)
-#define stfl(p, v) stfl_raw(p, v)
-#define stfq(p, v) stfq_raw(p, v)
-
-#define ldub_code(p) ldub_raw(p)
-#define ldsb_code(p) ldsb_raw(p)
-#define lduw_code(p) lduw_raw(p)
-#define ldsw_code(p) ldsw_raw(p)
-#define ldl_code(p) ldl_raw(p)
-#define ldq_code(p) ldq_raw(p)
-
-#define ldub_kernel(p) ldub_raw(p)
-#define ldsb_kernel(p) ldsb_raw(p)
-#define lduw_kernel(p) lduw_raw(p)
-#define ldsw_kernel(p) ldsw_raw(p)
-#define ldl_kernel(p) ldl_raw(p)
-#define ldq_kernel(p) ldq_raw(p)
-#define ldfl_kernel(p) ldfl_raw(p)
-#define ldfq_kernel(p) ldfq_raw(p)
-#define stb_kernel(p, v) stb_raw(p, v)
-#define stw_kernel(p, v) stw_raw(p, v)
-#define stl_kernel(p, v) stl_raw(p, v)
-#define stq_kernel(p, v) stq_raw(p, v)
-#define stfl_kernel(p, v) stfl_raw(p, v)
-#define stfq_kernel(p, vt) stfq_raw(p, v)
+#define SUFFIX
+#define ADDR_TYPE unsigned long
+#include "nommu_header.h"
+#define SUFFIX _code
+#define ADDR_TYPE unsigned long
+#include "nommu_header.h"
+#define SUFFIX MMU_MODE0_SUFFIX
+#define ADDR_TYPE unsigned long
+#include "nommu_header.h"
+#define SUFFIX MMU_MODE1_SUFFIX
+#define ADDR_TYPE unsigned long
+#include "nommu_header.h"
+#if (NB_MMU_MODES >= 3)
+#define SUFFIX MMU_MODE2_SUFFIX
+#define ADDR_TYPE unsigned long
+#include "nommu_header.h"
+#if (NB_MMU_MODES >= 4)
+#define SUFFIX MMU_MODE3_SUFFIX
+#define ADDR_TYPE unsigned long
+#include "nommu_header.h"
+#if (NB_MMU_MODES > 4)
+#error "NB_MMU_MODES > 4 is not supported for now"
+#endif /* (NB_MMU_MODES > 4) */
+#endif /* (NB_MMU_MODES >= 4) */
+#endif /* (NB_MMU_MODES >= 3) */
 
 #endif /* defined(CONFIG_USER_ONLY) */
 
@@ -790,6 +921,8 @@ extern uint8_t *phys_ram_dirty;
    the physical address */
 #define IO_MEM_ROMD        (1)
 #define IO_MEM_SUBPAGE     (2)
+/* On some target CPUs, endianness is stored in page tables */
+#define IO_MEM_REVERSE     (4)
 
 typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
 typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
@@ -821,8 +954,8 @@ static inline void cpu_physical_memory_w
 }
 uint32_t ldub_phys(target_phys_addr_t addr);
 uint32_t lduw_phys(target_phys_addr_t addr);
-uint32_t ldl_phys(target_phys_addr_t addr);
-uint64_t ldq_phys(target_phys_addr_t addr);
+uint32_t ldul_phys(target_phys_addr_t addr);
+uint64_t lduq_phys(target_phys_addr_t addr);
 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val);
 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val);
 void stb_phys(target_phys_addr_t addr, uint32_t val);
Index: cpu-exec.c
===================================================================
RCS file: /sources/qemu/qemu/cpu-exec.c,v
retrieving revision 1.120
diff -u -d -d -p -r1.120 cpu-exec.c
--- cpu-exec.c	14 Oct 2007 07:07:04 -0000	1.120
+++ cpu-exec.c	16 Oct 2007 11:39:03 -0000
@@ -436,12 +436,12 @@ int cpu_exec(CPUState *env1)
                          /* FIXME: this should respect TPR */
                          env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
                          svm_check_intercept(SVM_EXIT_VINTR);
-                         intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
+                         intno = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
                          if (loglevel & CPU_LOG_TB_IN_ASM)
                              fprintf(logfile, "Servicing virtual hardware INT=0x%02x\n", intno);
 	                 do_interrupt(intno, 0, 0, -1, 1);
                          stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl),
-                                  ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
+                                  ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
 #if defined(__sparc__) && !defined(HOST_SOLARIS)
                          tmp_T0 = 0;
 #else
Index: exec-all.h
===================================================================
RCS file: /sources/qemu/qemu/exec-all.h,v
retrieving revision 1.68
diff -u -d -d -p -r1.68 exec-all.h
--- exec-all.h	14 Oct 2007 07:07:04 -0000	1.68
+++ exec-all.h	16 Oct 2007 11:39:03 -0000
@@ -569,6 +569,21 @@ void tlb_fill(target_ulong addr, int is_
 #define MEMSUFFIX _code
 #define env cpu_single_env
 
+/* native-endian */
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+
+/* reverse-endian */
+#define REVERSE_ENDIAN
 #define DATA_SIZE 1
 #include "softmmu_header.h"
 
@@ -580,6 +595,7 @@ void tlb_fill(target_ulong addr, int is_
 
 #define DATA_SIZE 8
 #include "softmmu_header.h"
+#undef REVERSE_ENDIAN
 
 #undef ACCESS_TYPE
 #undef MEMSUFFIX
Index: exec.c
===================================================================
RCS file: /sources/qemu/qemu/exec.c,v
retrieving revision 1.109
diff -u -d -d -p -r1.109 exec.c
--- exec.c	14 Oct 2007 07:07:04 -0000	1.109
+++ exec.c	16 Oct 2007 11:39:03 -0000
@@ -2202,7 +2202,7 @@ static uint32_t watch_mem_readw(void *op
 
 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
 {
-    return ldl_phys(addr);
+    return ldul_phys(addr);
 }
 
 /* Generate a debug exception if a watchpoint has been hit.
@@ -2507,7 +2507,7 @@ void cpu_physical_memory_rw(target_phys_
     uint8_t *ptr;
     uint32_t val;
     target_phys_addr_t page;
-    unsigned long pd;
+    unsigned long pd, addr1;
     PhysPageDesc *p;
 
     while (len > 0) {
@@ -2524,31 +2524,54 @@ void cpu_physical_memory_rw(target_phys_
 
         if (is_write) {
             if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                /* XXX: could force cpu_single_env to NULL to avoid
-                   potential bugs */
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit write access */
-                    val = ldl_p(buf);
-                    io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit write access */
-                    val = lduw_p(buf);
-                    io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        stlr_p(ptr, *(uint32_t *)buf);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        stwr_p(ptr, *(uint16_t *)buf);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *ptr = *buf;
+                    goto invalidate_code;
                 } else {
-                    /* 8 bit write access */
-                    val = ldub_p(buf);
-                    io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
-                    l = 1;
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    /* XXX: could force cpu_single_env to NULL to avoid
+                       potential bugs */
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit write access */
+                        val = ldul_p(buf);
+                        io_mem_write[io_index][2](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit write access */
+                        val = lduw_p(buf);
+                        io_mem_write[io_index][1](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit write access */
+                        val = ldub_p(buf);
+                        io_mem_write[io_index][0](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 1;
+                    }
                 }
             } else {
-                unsigned long addr1;
                 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
                 /* RAM case */
                 ptr = phys_ram_base + addr1;
                 memcpy(ptr, buf, l);
+            invalidate_code:
                 if (!cpu_physical_memory_is_dirty(addr1)) {
                     /* invalidate code */
                     tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
@@ -2560,23 +2583,45 @@ void cpu_physical_memory_rw(target_phys_
         } else {
             if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
                 !(pd & IO_MEM_ROMD)) {
-                /* I/O case */
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit read access */
-                    val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
-                    stl_p(buf, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit read access */
-                    val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
-                    stw_p(buf, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        *(uint32_t *)buf = ldulr_p(ptr);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        *(uint16_t *)buf = lduwr_p(ptr);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *buf = *ptr;
                 } else {
-                    /* 8 bit read access */
-                    val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr);
-                    stb_p(buf, val);
-                    l = 1;
+                    /* I/O case */
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit read access */
+                        val = io_mem_read[io_index][2](io_mem_opaque[io_index],
+                                                       addr);
+                        stl_p(buf, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit read access */
+                        val = io_mem_read[io_index][1](io_mem_opaque[io_index],
+                                                       addr);
+                        stw_p(buf, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit read access */
+                        val = io_mem_read[io_index][0](io_mem_opaque[io_index],
+                                                       addr);
+                        stb_p(buf, val);
+                        l = 1;
+                    }
                 }
             } else {
                 /* RAM case */
@@ -2632,7 +2677,7 @@ void cpu_physical_memory_write_rom(targe
 
 
 /* warning: addr must be aligned */
-uint32_t ldl_phys(target_phys_addr_t addr)
+uint32_t ldul_phys(target_phys_addr_t addr)
 {
     int io_index;
     uint8_t *ptr;
@@ -2656,13 +2701,13 @@ uint32_t ldl_phys(target_phys_addr_t add
         /* RAM case */
         ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldl_p(ptr);
+        val = ldul_p(ptr);
     }
     return val;
 }
 
 /* warning: addr must be aligned */
-uint64_t ldq_phys(target_phys_addr_t addr)
+uint64_t lduq_phys(target_phys_addr_t addr)
 {
     int io_index;
     uint8_t *ptr;
@@ -2692,7 +2737,7 @@ uint64_t ldq_phys(target_phys_addr_t add
         /* RAM case */
         ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldq_p(ptr);
+        val = lduq_p(ptr);
     }
     return val;
 }
@@ -2907,6 +2952,7 @@ void dump_exec_info(FILE *f,
 #define env cpu_single_env
 #define SOFTMMU_CODE_ACCESS
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2919,6 +2965,21 @@ void dump_exec_info(FILE *f,
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 #undef env
 
 #endif
Index: monitor.c
===================================================================
RCS file: /sources/qemu/qemu/monitor.c,v
retrieving revision 1.83
diff -u -d -d -p -r1.83 monitor.c
--- monitor.c	25 Sep 2007 17:28:42 -0000	1.83
+++ monitor.c	16 Oct 2007 11:39:03 -0000
@@ -595,10 +595,10 @@ static void memory_dump(int count, int f
                 v = lduw_raw(buf + i);
                 break;
             case 4:
-                v = (uint32_t)ldl_raw(buf + i);
+                v = (uint32_t)ldul_raw(buf + i);
                 break;
             case 8:
-                v = ldq_raw(buf + i);
+                v = lduq_raw(buf + i);
                 break;
             }
             term_printf(" ");
Index: nommu_header.h
===================================================================
RCS file: nommu_header.h
diff -N nommu_header.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ nommu_header.h	16 Oct 2007 11:39:03 -0000
@@ -0,0 +1,51 @@
+/*
+ *  No MMU support definitions
+ *
+ *  Copyright (c) 2003-2007 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* Native-endian accesses */
+#define ESUFFIX
+#define LSUFFIX _raw
+#define LADDR_TYPE (void *)
+#include "nommu_template.h"
+/* Reverse-endian accesses */
+#define ESUFFIX r
+#define LSUFFIX r_raw
+#define LADDR_TYPE (void *)
+#include "nommu_template.h"
+/* big-endian accesses */
+#define ESUFFIX _be
+#define LADDR_TYPE (unsigned long)
+#ifdef TARGET_WORDS_BIGENDIAN
+#define LSUFFIX
+#else
+#define LSUFFIX r
+#endif
+#include "nommu_template.h"
+/* little-endian accesses */
+#define ESUFFIX _le
+#define LADDR_TYPE (unsigned long)
+#ifdef TARGET_WORDS_BIGENDIAN
+#define LSUFFIX r
+#else
+#define LSUFFIX
+#endif
+#include "nommu_template.h"
+
+#undef ADDR_TYPE
+#undef SUFFIX
Index: nommu_template.h
===================================================================
RCS file: nommu_template.h
diff -N nommu_template.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ nommu_template.h	16 Oct 2007 11:39:03 -0000
@@ -0,0 +1,109 @@
+/*
+ *  No MMU support helpers
+ *
+ *  Copyright (c) 2003-2007 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* XXX: fix this */
+#ifndef glue
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#endif
+
+static inline int glue(glue(ldub, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
+{
+    return glue(ldub, LSUFFIX)(LADDR_TYPE ptr);
+}
+
+static inline int glue(glue(ldsb, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
+{
+    return glue(ldsb, LSUFFIX)(LADDR_TYPE ptr);
+}
+
+static inline int glue(glue(lduw, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
+{
+    return glue(lduw, LSUFFIX)(LADDR_TYPE ptr);
+}
+
+static inline int glue(glue(ldsw, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
+{
+    return glue(ldsw, LSUFFIX)(LADDR_TYPE ptr);
+}
+
+static inline target_long glue(glue(ldul, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
+{
+    return glue(ldul, LSUFFIX)(LADDR_TYPE ptr);
+}
+
+static inline target_long glue(glue(ldsl, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
+{
+    return glue(ldsl, LSUFFIX)(LADDR_TYPE ptr);
+}
+
+static inline uint64_t glue(glue(lduq, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
+{
+    return glue(lduq, LSUFFIX)(LADDR_TYPE ptr);
+}
+
+static inline int64_t glue(glue(ldsq, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
+{
+    return glue(ldsq, LSUFFIX)(LADDR_TYPE ptr);
+}
+
+static inline float32 glue(glue(ldfl, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
+{
+    return glue(ldfl, LSUFFIX)(LADDR_TYPE ptr);
+}
+
+static inline float64 glue(glue(ldfq, ESUFFIX), SUFFIX) (ADDR_TYPE ptr)
+{
+    return glue(ldfq, LSUFFIX)(LADDR_TYPE ptr);
+}
+
+static inline void glue(glue(stb, ESUFFIX), SUFFIX) (ADDR_TYPE ptr, int v)
+{
+    glue(stb, LSUFFIX)(LADDR_TYPE ptr, v);
+}
+
+static inline void glue(glue(stw, ESUFFIX), SUFFIX) (ADDR_TYPE ptr, int v)
+{
+    glue(stw, LSUFFIX)(LADDR_TYPE ptr, v);
+}
+
+static inline void glue(glue(stl, ESUFFIX), SUFFIX) (ADDR_TYPE ptr, int v)
+{
+    glue(stl, LSUFFIX)(LADDR_TYPE ptr, v);
+}
+
+static inline void glue(glue(stq, ESUFFIX), SUFFIX) (ADDR_TYPE ptr, uint64_t v)
+{
+    glue(stq, LSUFFIX)(LADDR_TYPE ptr, v);
+}
+
+static inline void glue(glue(stfl, ESUFFIX), SUFFIX) (ADDR_TYPE ptr, float32 v)
+{
+    glue(stfl, LSUFFIX)(LADDR_TYPE ptr, v);
+}
+
+static inline void glue(glue(stfq, ESUFFIX), SUFFIX) (ADDR_TYPE ptr, float64 v)
+{
+    glue(stfq, LSUFFIX)(LADDR_TYPE ptr, v);
+}
+
+#undef LSUFFIX
+#undef LADDR_TYPE
+#undef ESUFFIX
Index: softmmu_exec.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_exec.h,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 softmmu_exec.h
--- softmmu_exec.h	14 Oct 2007 07:07:05 -0000	1.2
+++ softmmu_exec.h	16 Oct 2007 11:39:03 -0000
@@ -1,14 +1,5 @@
 /* Common softmmu definitions and inline routines.  */
 
-/* XXX: find something cleaner.
- * Furthermore, this is false for 64 bits targets
- */
-#define ldul_user       ldl_user
-#define ldul_kernel     ldl_kernel
-#define ldul_hypv       ldl_hypv
-#define ldul_executive  ldl_executive
-#define ldul_supervisor ldl_supervisor
-
 #define ACCESS_TYPE 0
 #define MEMSUFFIX MMU_MODE0_SUFFIX
 #define DATA_SIZE 1
@@ -104,8 +95,10 @@
 #define ldsb(p) ldsb_data(p)
 #define lduw(p) lduw_data(p)
 #define ldsw(p) ldsw_data(p)
-#define ldl(p) ldl_data(p)
-#define ldq(p) ldq_data(p)
+#define ldul(p) ldul_data(p)
+#define ldsl(p) ldsl_data(p)
+#define lduq(p) lduq_data(p)
+#define ldsq(p) ldsq_data(p)
 
 #define stb(p, v) stb_data(p, v)
 #define stw(p, v) stw_data(p, v)
Index: softmmu_header.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_header.h,v
retrieving revision 1.18
diff -u -d -d -p -r1.18 softmmu_header.h
--- softmmu_header.h	14 Oct 2007 07:07:05 -0000	1.18
+++ softmmu_header.h	16 Oct 2007 11:39:03 -0000
@@ -17,27 +17,84 @@
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
+
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
+#if defined(TARGET_WORDS_BIGENDIAN)
+#define ESUFFIX _be
+#else
+#define ESUFFIX _le
+#endif
 #if DATA_SIZE == 8
 #define SUFFIX q
-#define USUFFIX q
+#define USUFFIX uq
+#define LSUFFIX q
+#define LUSUFFIX uq
+#define DATA_STYPE int64_t
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
-#define USUFFIX l
+#define USUFFIX ul
+#define LSUFFIX l
+#define LUSUFFIX ul
+#define DATA_STYPE int32_t
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define USUFFIX uw
+#define LSUFFIX w
+#define LUSUFFIX uw
 #define DATA_TYPE uint16_t
 #define DATA_STYPE int16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define USUFFIX ub
+#define LSUFFIX b
+#define LUSUFFIX ub
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#else
+#error unsupported data size
+#endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if defined(TARGET_WORDS_BIGENDIAN)
+#define ESUFFIX _le
+#else
+#define ESUFFIX _be
+#endif
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX uqr
+#define LSUFFIX q
+#define LUSUFFIX uq
+#define DATA_STYPE int64_t
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX ulr
+#define LSUFFIX l
+#define LUSUFFIX ul
+#define DATA_STYPE int32_t
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define LSUFFIX w
+#define LUSUFFIX uw
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define LSUFFIX b
+#define LUSUFFIX ub
 #define DATA_TYPE uint8_t
 #define DATA_STYPE int8_t
 #else
 #error unsupported data size
 #endif
+#endif /* defined(REVERSE_ENDIAN) */
 
 #if ACCESS_TYPE < (NB_MMU_MODES)
 
@@ -121,7 +178,6 @@ static inline RES_TYPE glue(glue(ld, USU
     return res;
 }
 
-#if DATA_SIZE <= 2
 static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 {
     int res;
@@ -167,7 +223,6 @@ static inline int glue(glue(lds, SUFFIX)
                   : "%eax", "%ecx", "%edx", "memory", "cc");
     return res;
 }
-#endif
 
 static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
 {
@@ -244,7 +299,6 @@ static inline RES_TYPE glue(glue(ld, USU
     return res;
 }
 
-#if DATA_SIZE <= 2
 static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 {
     int res, index;
@@ -264,7 +318,6 @@ static inline int glue(glue(lds, SUFFIX)
     }
     return res;
 }
-#endif
 
 #if ACCESS_TYPE != (NB_MMU_MODES + 1)
 
@@ -293,8 +346,27 @@ static inline void glue(glue(st, SUFFIX)
 
 #endif /* !asm */
 
+/* BE/LE access routines */
+static inline RES_TYPE glue(glue(glue(ld, LUSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(glue(ld, USUFFIX), MEMSUFFIX)(ptr);
+}
+
+static inline RES_TYPE glue(glue(glue(lds, LSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(glue(lds, SUFFIX), MEMSUFFIX)(ptr);
+}
+
 #if ACCESS_TYPE != (NB_MMU_MODES + 1)
+static inline void glue(glue(glue(st, LSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+{
+    glue(glue(st, SUFFIX), MEMSUFFIX)(ptr, v);
+}
+#endif
 
+#if ACCESS_TYPE != (NB_MMU_MODES + 1)
+
+#if !defined(REVERSE_ENDIAN)
 #if DATA_SIZE == 8
 static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
 {
@@ -302,10 +374,15 @@ static inline float64 glue(ldfq, MEMSUFF
         float64 d;
         uint64_t i;
     } u;
-    u.i = glue(ldq, MEMSUFFIX)(ptr);
+    u.i = glue(lduq, MEMSUFFIX)(ptr);
     return u.d;
 }
 
+static inline float64 glue(glue(ldfq, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfq, MEMSUFFIX)(ptr);
+}
+
 static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v)
 {
     union {
@@ -315,6 +392,12 @@ static inline void glue(stfq, MEMSUFFIX)
     u.d = v;
     glue(stq, MEMSUFFIX)(ptr, u.i);
 }
+
+static inline void glue(glue(stfq, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                        float64 v)
+{
+    glue(stfq, MEMSUFFIX)(ptr, v);
+}
 #endif /* DATA_SIZE == 8 */
 
 #if DATA_SIZE == 4
@@ -324,10 +407,15 @@ static inline float32 glue(ldfl, MEMSUFF
         float32 f;
         uint32_t i;
     } u;
-    u.i = glue(ldl, MEMSUFFIX)(ptr);
+    u.i = glue(ldul, MEMSUFFIX)(ptr);
     return u.f;
 }
 
+static inline float32 glue(glue(ldfl, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfl, MEMSUFFIX)(ptr);
+}
+
 static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
 {
     union {
@@ -337,8 +425,84 @@ static inline void glue(stfl, MEMSUFFIX)
     u.f = v;
     glue(stl, MEMSUFFIX)(ptr, u.i);
 }
+
+static inline void glue(glue(stfl, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                        float32 v)
+{
+    glue(stfl, MEMSUFFIX)(ptr, v);
+}
 #endif /* DATA_SIZE == 4 */
 
+#else /* defined(REVERSE_ENDIAN) */
+
+#if DATA_SIZE == 8
+static inline float64 glue(ldfqr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.i = glue(lduqr, MEMSUFFIX)(ptr);
+    return u.d;
+}
+
+static inline float64 glue(glue(ldfqr, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfqr, MEMSUFFIX)(ptr);
+}
+
+static inline void glue(stfqr, MEMSUFFIX)(target_ulong ptr, float64 v)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.d = v;
+    glue(stqr, MEMSUFFIX)(ptr, u.i);
+}
+
+static inline void glue(glue(stfqr, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                         float64 v)
+{
+    glue(stfqr, MEMSUFFIX)(ptr, v);
+}
+#endif /* DATA_SIZE == 8 */
+
+#if DATA_SIZE == 4
+static inline float32 glue(ldflr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = glue(ldulr, MEMSUFFIX)(ptr);
+    return u.f;
+}
+
+static inline float32 glue(glue(ldflr, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldflr, MEMSUFFIX)(ptr);
+}
+
+static inline void glue(stflr, MEMSUFFIX)(target_ulong ptr, float32 v)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    glue(stlr, MEMSUFFIX)(ptr, u.i);
+}
+
+static inline void glue(glue(stflr, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                         float32 v)
+{
+    glue(stflr, MEMSUFFIX)(ptr, v);
+}
+#endif /* DATA_SIZE == 4 */
+
+#endif /* defined(REVERSE_ENDIAN) */
+
 #endif /* ACCESS_TYPE != (NB_MMU_MODES + 1) */
 
 #undef RES_TYPE
@@ -346,7 +510,10 @@ static inline void glue(stfl, MEMSUFFIX)
 #undef DATA_STYPE
 #undef SUFFIX
 #undef USUFFIX
+#undef LSUFFIX
+#undef LUSUFFIX
 #undef DATA_SIZE
 #undef CPU_MMU_INDEX
 #undef MMUSUFFIX
+#undef ESUFFIX
 #undef ADDR_READ
Index: softmmu_template.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_template.h,v
retrieving revision 1.19
diff -u -d -d -p -r1.19 softmmu_template.h
--- softmmu_template.h	14 Oct 2007 07:07:05 -0000	1.19
+++ softmmu_template.h	16 Oct 2007 11:39:03 -0000
@@ -19,25 +19,66 @@
  */
 #define DATA_SIZE (1 << SHIFT)
 
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
 #if DATA_SIZE == 8
 #define SUFFIX q
-#define USUFFIX q
+#define USUFFIX uq
+#define RSUFFIX qr
+#define URSUFFIX uqr
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
-#define USUFFIX l
+#define USUFFIX ul
+#define RSUFFIX lr
+#define URSUFFIX ulr
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define USUFFIX uw
+#define RSUFFIX wr
+#define URSUFFIX uwr
 #define DATA_TYPE uint16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define USUFFIX ub
+#define RSUFFIX br
+#define URSUFFIX ubr
 #define DATA_TYPE uint8_t
 #else
 #error unsupported data size
 #endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX uqr
+#define RSUFFIX q
+#define URSUFFIX uq
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX ulr
+#define RSUFFIX l
+#define URSUFFIX ul
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define RSUFFIX w
+#define URSUFFIX uw
+#define DATA_TYPE uint16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define RSUFFIX b
+#define URSUFFIX ub
+#define DATA_TYPE uint8_t
+#else
+#error unsupported data size
+#endif
+#endif /* defined(REVERSE_ENDIAN) */
+
 
 #ifdef SOFTMMU_CODE_ACCESS
 #define READ_ACCESS_TYPE 2
@@ -47,25 +88,62 @@
 #define ADDR_READ addr_read
 #endif
 
+#if (defined(TARGET_WORDS_BIGENDIAN) && !defined(REVERSE_ENDIAN)) || \
+    (!defined(TARGET_WORDS_BIGENDIAN) && defined(REVERSE_ENDIAN))
+#define ACCESS_WORDS_BIGENDIAN
+#endif
+
+/* Beware: we do not have reverse-endian accessors for IOs */
+#if defined(REVERSE_ENDIAN)
+#define DO_IOSWAP 1
+#else
+#define DO_IOSWAP 0
+#endif
+#if SHIFT == 1
+#define IOSWAP(val) bswap16(val)
+#elif SHIFT >= 2
+#define IOSWAP(val) bswap32(val)
+#else
+#define IOSWAP(val) (val)
+#endif
+
 static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
                                                         int mmu_idx,
                                                         void *retaddr);
 static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
-                                              target_ulong tlb_addr)
+                                              target_ulong tlb_addr,
+                                              int do_ioswap)
 {
     DATA_TYPE res;
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
 #if SHIFT <= 2
     res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        res = IOSWAP(res);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    res = (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr) << 32;
-    res |= io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = (uint64_t)tmp << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= tmp;
 #else
-    res = io_mem_read[index][2](io_mem_opaque[index], physaddr);
-    res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = tmp;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= (uint64_t)tmp << 32;
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -92,10 +170,34 @@ DATA_TYPE REGPARM(1) glue(glue(__ld, SUF
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, READ_ACCESS_TYPE,
+                                            mmu_idx, retaddr);
+                    }
+#endif
+                    res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
@@ -144,10 +246,45 @@ static DATA_TYPE glue(glue(slow_ld, SUFF
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages) */
+                        addr1 = addr & ~(DATA_SIZE - 1);
+                        addr2 = addr1 + DATA_SIZE;
+                        res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
+                                                                      mmu_idx,
+                                                                      retaddr);
+                        res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
+                                                                      mmu_idx,
+                                                                      retaddr);
+                        shift = (addr & (DATA_SIZE - 1)) * 8;
+#ifdef ACCESS_WORDS_BIGENDIAN
+                        res = (res1 >> shift) |
+                            (res2 << ((DATA_SIZE * 8) - shift));
+#else
+                        res = (res1 << shift) |
+                            (res2 >> ((DATA_SIZE * 8) - shift));
+#endif
+                        res = (DATA_TYPE)res;
+                    } else {
+                        /* unaligned/aligned access in the same page */
+                        res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
@@ -158,7 +295,7 @@ static DATA_TYPE glue(glue(slow_ld, SUFF
             res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
                                                           mmu_idx, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
             res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
 #else
             res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
@@ -186,22 +323,39 @@ static void glue(glue(slow_st, SUFFIX), 
 static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
                                           DATA_TYPE val,
                                           target_ulong tlb_addr,
-                                          void *retaddr)
+                                          void *retaddr, int do_ioswap)
 {
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     env->mem_write_vaddr = tlb_addr;
     env->mem_write_pc = (unsigned long)retaddr;
 #if SHIFT <= 2
+    if (do_ioswap != DO_IOSWAP)
+        val = IOSWAP(val);
     io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val >> 32);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #else
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -224,12 +378,37 @@ void REGPARM(2) glue(glue(__st, SUFFIX),
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            retaddr = GETPC();
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    retaddr = GETPC();
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, 1, mmu_idx, retaddr);
+                    }
+#endif
+                    glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr, val);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                retaddr = GETPC();
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+            /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
@@ -275,15 +454,48 @@ static void glue(glue(slow_st, SUFFIX), 
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        /* XXX: not efficient, but simple */
+                        for(i = 0;i < DATA_SIZE; i++) {
+#ifdef ACCESS_WORDS_BIGENDIAN
+                            glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
+                                                      mmu_idx, retaddr);
+#else
+                            glue(slow_stb, MMUSUFFIX)(addr + i,
+                                                      val >> (((DATA_SIZE - 1) * 8)
+                                                              - (i * 8)),
+                                                      mmu_idx, retaddr);
+#endif
+                        }
+
+                    } else {
+                        /* aligned/unaligned access in the same page */
+                        glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr,
+                                                      val);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
             for(i = 0;i < DATA_SIZE; i++) {
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
                 glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
                                           mmu_idx, retaddr);
 #else
@@ -304,10 +516,15 @@ static void glue(glue(slow_st, SUFFIX), 
 
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
 
+#undef DO_IOSWAP
+#undef IOSWAP
+#undef ACCESS_WORDS_BIGENDIAN
 #undef READ_ACCESS_TYPE
 #undef SHIFT
 #undef DATA_TYPE
 #undef SUFFIX
 #undef USUFFIX
+#undef RSUFFIX
+#undef URSUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
Index: hw/eepro100.c
===================================================================
RCS file: /sources/qemu/qemu/hw/eepro100.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 eepro100.c
--- hw/eepro100.c	16 Sep 2007 21:07:52 -0000	1.6
+++ hw/eepro100.c	16 Oct 2007 11:39:04 -0000
@@ -723,7 +723,7 @@ static void eepro100_cu_command(EEPRO100
             uint32_t tbd_address = cb_address + 0x10;
             assert(tcb_bytes <= sizeof(buf));
             while (size < tcb_bytes) {
-                uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                uint32_t tx_buffer_address = ldul_phys(tbd_address);
                 uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                 //~ uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                 tbd_address += 8;
@@ -743,7 +743,7 @@ static void eepro100_cu_command(EEPRO100
                     /* Extended TCB. */
                     assert(tcb_bytes == 0);
                     for (; tbd_count < 2; tbd_count++) {
-                        uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                        uint32_t tx_buffer_address = ldul_phys(tbd_address);
                         uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                         uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                         tbd_address += 8;
@@ -760,7 +760,7 @@ static void eepro100_cu_command(EEPRO100
                 }
                 tbd_address = tbd_array;
                 for (; tbd_count < tx.tbd_count; tbd_count++) {
-                    uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                    uint32_t tx_buffer_address = ldul_phys(tbd_address);
                     uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                     uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                     tbd_address += 8;
Index: hw/pc.c
===================================================================
RCS file: /sources/qemu/qemu/hw/pc.c,v
retrieving revision 1.87
diff -u -d -d -p -r1.87 pc.c
--- hw/pc.c	9 Oct 2007 03:08:56 -0000	1.87
+++ hw/pc.c	16 Oct 2007 11:39:04 -0000
@@ -477,8 +477,8 @@ static void load_linux(const char *kerne
     }
 
     /* kernel protocol version */
-    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
-    if (ldl_p(header+0x202) == 0x53726448)
+    fprintf(stderr, "header magic: %#x\n", ldul_p(header+0x202));
+    if (ldul_p(header+0x202) == 0x53726448)
 	protocol = lduw_p(header+0x206);
     else
 	protocol = 0;
@@ -510,7 +510,7 @@ static void load_linux(const char *kerne
 
     /* highest address for loading the initrd */
     if (protocol >= 0x203)
-	initrd_max = ldl_p(header+0x22c);
+	initrd_max = ldul_p(header+0x22c);
     else
 	initrd_max = 0x37ffffff;
 
Index: hw/pl080.c
===================================================================
RCS file: /sources/qemu/qemu/hw/pl080.c,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 pl080.c
--- hw/pl080.c	16 Sep 2007 21:07:55 -0000	1.5
+++ hw/pl080.c	16 Oct 2007 11:39:05 -0000
@@ -162,10 +162,10 @@ again:
             if (size == 0) {
                 /* Transfer complete.  */
                 if (ch->lli) {
-                    ch->src = ldl_phys(ch->lli);
-                    ch->dest = ldl_phys(ch->lli + 4);
-                    ch->ctrl = ldl_phys(ch->lli + 12);
-                    ch->lli = ldl_phys(ch->lli + 8);
+                    ch->src = ldul_phys(ch->lli);
+                    ch->dest = ldul_phys(ch->lli + 4);
+                    ch->ctrl = ldul_phys(ch->lli + 12);
+                    ch->lli = ldul_phys(ch->lli + 8);
                 } else {
                     ch->conf &= ~PL080_CCONF_E;
                 }
Index: hw/sun4m.c
===================================================================
RCS file: /sources/qemu/qemu/hw/sun4m.c,v
retrieving revision 1.55
diff -u -d -d -p -r1.55 sun4m.c
--- hw/sun4m.c	6 Oct 2007 11:28:21 -0000	1.55
+++ hw/sun4m.c	16 Oct 2007 11:39:05 -0000
@@ -465,7 +465,7 @@ static void sun4m_load_kernel(long vram_
         }
         if (initrd_size > 0) {
             for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
-                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
+                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
                     == 0x48647253) { // HdrS
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
Index: hw/sun4u.c
===================================================================
RCS file: /sources/qemu/qemu/hw/sun4u.c,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 sun4u.c
--- hw/sun4u.c	6 Oct 2007 11:28:21 -0000	1.22
+++ hw/sun4u.c	16 Oct 2007 11:39:05 -0000
@@ -418,7 +418,7 @@ static void sun4u_init(int ram_size, int
         }
         if (initrd_size > 0) {
             for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
-                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
+                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
                     == 0x48647253) { // HdrS
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
Index: linux-user/elfload.c
===================================================================
RCS file: /sources/qemu/qemu/linux-user/elfload.c,v
retrieving revision 1.51
diff -u -d -d -p -r1.51 elfload.c
--- linux-user/elfload.c	9 Oct 2007 16:34:29 -0000	1.51
+++ linux-user/elfload.c	16 Oct 2007 11:39:05 -0000
@@ -322,8 +322,8 @@ static inline void init_thread(struct ta
     _regs->msr = 1 << MSR_PR; /* Set user mode */
     _regs->gpr[1] = infop->start_stack;
 #ifdef TARGET_PPC64
-    entry = ldq_raw(infop->entry) + infop->load_addr;
-    toc = ldq_raw(infop->entry + 8) + infop->load_addr;
+    entry = lduq_raw(infop->entry) + infop->load_addr;
+    toc = lduq_raw(infop->entry + 8) + infop->load_addr;
     _regs->gpr[2] = toc;
     infop->entry = entry;
 #endif
@@ -336,7 +336,7 @@ static inline void init_thread(struct ta
     pos += sizeof(target_ulong);
     _regs->gpr[4] = pos;
     for (tmp = 1; tmp != 0; pos += sizeof(target_ulong))
-        tmp = ldl(pos);
+        tmp = ldul(pos);
     _regs->gpr[5] = pos;
 }
 
Index: linux-user/qemu.h
===================================================================
RCS file: /sources/qemu/qemu/linux-user/qemu.h,v
retrieving revision 1.40
diff -u -d -d -p -r1.40 qemu.h
--- linux-user/qemu.h	9 Oct 2007 16:34:29 -0000	1.40
+++ linux-user/qemu.h	16 Oct 2007 11:39:05 -0000
@@ -313,15 +313,15 @@ static inline void *lock_user_string(tar
 #define tput8(addr, val) stb(addr, val)
 #define tget16(addr) lduw(addr)
 #define tput16(addr, val) stw(addr, val)
-#define tget32(addr) ldl(addr)
+#define tget32(addr) ldul(addr)
 #define tput32(addr, val) stl(addr, val)
-#define tget64(addr) ldq(addr)
+#define tget64(addr) lduq(addr)
 #define tput64(addr, val) stq(addr, val)
 #if TARGET_LONG_BITS == 64
-#define tgetl(addr) ldq(addr)
+#define tgetl(addr) lduq(addr)
 #define tputl(addr, val) stq(addr, val)
 #else
-#define tgetl(addr) ldl(addr)
+#define tgetl(addr) ldul(addr)
 #define tputl(addr, val) stl(addr, val)
 #endif
 
Index: linux-user/signal.c
===================================================================
RCS file: /sources/qemu/qemu/linux-user/signal.c,v
retrieving revision 1.45
diff -u -d -d -p -r1.45 signal.c
--- linux-user/signal.c	5 Oct 2007 17:01:51 -0000	1.45
+++ linux-user/signal.c	16 Oct 2007 11:39:05 -0000
@@ -878,28 +878,28 @@ restore_sigcontext(CPUX86State *env, str
         cpu_x86_load_seg(env, R_ES, lduw(&sc->es));
         cpu_x86_load_seg(env, R_DS, lduw(&sc->ds));
 
-        env->regs[R_EDI] = ldl(&sc->edi);
-        env->regs[R_ESI] = ldl(&sc->esi);
-        env->regs[R_EBP] = ldl(&sc->ebp);
-        env->regs[R_ESP] = ldl(&sc->esp);
-        env->regs[R_EBX] = ldl(&sc->ebx);
-        env->regs[R_EDX] = ldl(&sc->edx);
-        env->regs[R_ECX] = ldl(&sc->ecx);
-        env->eip = ldl(&sc->eip);
+        env->regs[R_EDI] = ldul(&sc->edi);
+        env->regs[R_ESI] = ldul(&sc->esi);
+        env->regs[R_EBP] = ldul(&sc->ebp);
+        env->regs[R_ESP] = ldul(&sc->esp);
+        env->regs[R_EBX] = ldul(&sc->ebx);
+        env->regs[R_EDX] = ldul(&sc->edx);
+        env->regs[R_ECX] = ldul(&sc->ecx);
+        env->eip = ldul(&sc->eip);
 
         cpu_x86_load_seg(env, R_CS, lduw(&sc->cs) | 3);
         cpu_x86_load_seg(env, R_SS, lduw(&sc->ss) | 3);
 
 	{
 		unsigned int tmpflags;
-                tmpflags = ldl(&sc->eflags);
+                tmpflags = ldul(&sc->eflags);
 		env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
                 //		regs->orig_eax = -1;		/* disable syscall checks */
 	}
 
 	{
 		struct _fpstate * buf;
-                buf = (void *)ldl(&sc->fpstate);
+                buf = (void *)ldul(&sc->fpstate);
 		if (buf) {
 #if 0
 			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
@@ -909,7 +909,7 @@ restore_sigcontext(CPUX86State *env, str
 		}
 	}
 
-        *peax = ldl(&sc->eax);
+        *peax = ldul(&sc->eax);
 	return err;
 #if 0
 badframe:
Index: linux-user/vm86.c
===================================================================
RCS file: /sources/qemu/qemu/linux-user/vm86.c,v
retrieving revision 1.11
diff -u -d -d -p -r1.11 vm86.c
--- linux-user/vm86.c	17 Sep 2007 08:09:50 -0000	1.11
+++ linux-user/vm86.c	16 Oct 2007 11:39:05 -0000
@@ -56,7 +56,7 @@ static inline unsigned int vm_getw(uint8
 
 static inline unsigned int vm_getl(uint8_t *segptr, unsigned int reg16)
 {
-    return ldl(segptr + (reg16 & 0xffff));
+    return ldul(segptr + (reg16 & 0xffff));
 }
 
 void save_v86_state(CPUX86State *env)
Index: target-alpha/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/exec.h,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 exec.h
--- target-alpha/exec.h	14 Oct 2007 07:07:05 -0000	1.4
+++ target-alpha/exec.h	16 Oct 2007 11:39:05 -0000
@@ -62,6 +62,9 @@ register uint64_t T2 asm(AREG3);
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-alpha/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/helper.c,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 helper.c
--- target-alpha/helper.c	14 Oct 2007 07:07:05 -0000	1.5
+++ target-alpha/helper.c	16 Oct 2007 11:39:05 -0000
@@ -69,7 +69,7 @@ int cpu_alpha_handle_mmu_fault (CPUState
             env->exception_index = EXCP_DTB_MISS_PAL;
         else
             env->exception_index = EXCP_DTB_MISS_NATIVE;
-        opc = (ldl_code(env->pc) >> 21) << 4;
+        opc = (ldul_code(env->pc) >> 21) << 4;
         if (rw) {
             opc |= 0x9;
         } else {
@@ -108,7 +108,7 @@ int cpu_alpha_mfpr (CPUState *env, int i
         if (env->features & FEATURE_SPS)
             *valp = env->ipr[IPR_ESP];
         else
-            *valp = ldq_raw(hwpcb + 8);
+            *valp = lduq_raw(hwpcb + 8);
         break;
     case IPR_FEN:
         *valp = ((int64_t)(env->ipr[IPR_FEN] << 63)) >> 63;
@@ -127,7 +127,7 @@ int cpu_alpha_mfpr (CPUState *env, int i
             if (env->features & FEATURE_SPS)
                 *valp = env->ipr[IPR_KSP];
             else
-                *valp = ldq_raw(hwpcb + 0);
+                *valp = lduq_raw(hwpcb + 0);
         }
         break;
     case IPR_MCES:
@@ -159,7 +159,7 @@ int cpu_alpha_mfpr (CPUState *env, int i
         if (env->features & FEATURE_SPS)
             *valp = env->ipr[IPR_SSP];
         else
-            *valp = ldq_raw(hwpcb + 16);
+            *valp = lduq_raw(hwpcb + 16);
         break;
     case IPR_SYSPTBR:
         if (env->features & FEATURE_VIRBND)
@@ -200,7 +200,7 @@ int cpu_alpha_mfpr (CPUState *env, int i
         if (env->features & FEATURE_SPS)
             *valp = env->ipr[IPR_USP];
         else
-            *valp = ldq_raw(hwpcb + 24);
+            *valp = lduq_raw(hwpcb + 24);
         break;
     case IPR_VIRBND:
         if (env->features & FEATURE_VIRBND)
@@ -255,7 +255,7 @@ int cpu_alpha_mtpr (CPUState *env, int i
     case IPR_DATFX:
         env->ipr[IPR_DATFX] &= ~0x1;
         env->ipr[IPR_DATFX] |= val & 1;
-        tmp64 = ldq_raw(hwpcb + 56);
+        tmp64 = lduq_raw(hwpcb + 56);
         tmp64 &= ~0x8000000000000000ULL;
         tmp64 |= (val & 1) << 63;
         stq_raw(hwpcb + 56, tmp64);
@@ -268,7 +268,7 @@ int cpu_alpha_mtpr (CPUState *env, int i
         break;
     case IPR_FEN:
         env->ipr[IPR_FEN] = val & 1;
-        tmp64 = ldq_raw(hwpcb + 56);
+        tmp64 = lduq_raw(hwpcb + 56);
         tmp64 &= ~1;
         tmp64 |= val & 1;
         stq_raw(hwpcb + 56, tmp64);
@@ -438,7 +438,7 @@ void cpu_dump_state (CPUState *env, FILE
                 *((uint64_t *)(&env->ft0)), *((uint64_t *)(&env->ft1)),
                 *((uint64_t *)(&env->ft2)));
     cpu_fprintf(f, "\nMEM " TARGET_FMT_lx " %d %d\n",
-                ldq_raw(0x000000004007df60ULL),
+                lduq_raw(0x000000004007df60ULL),
                 (uint8_t *)(&env->ft0), (uint8_t *)(&env->fir[0]));
 }
 
Index: target-alpha/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/op_helper.c,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 op_helper.c
--- target-alpha/op_helper.c	14 Oct 2007 08:18:12 -0000	1.4
+++ target-alpha/op_helper.c	16 Oct 2007 11:39:05 -0000
@@ -1213,6 +1213,21 @@ void helper_st_phys_to_virt (void)
 
 #define MMUSUFFIX _mmu
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1224,6 +1239,7 @@ void helper_st_phys_to_virt (void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-alpha/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/op_mem.h,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_mem.h
--- target-alpha/op_mem.h	16 Sep 2007 21:08:01 -0000	1.2
+++ target-alpha/op_mem.h	16 Oct 2007 11:39:05 -0000
@@ -30,14 +30,14 @@ static inline uint32_t glue(ldl_l, MEMSU
 {
     env->lock = EA;
 
-    return glue(ldl, MEMSUFFIX)(EA);
+    return glue(ldul, MEMSUFFIX)(EA);
 }
 
 static inline uint32_t glue(ldq_l, MEMSUFFIX) (target_ulong EA)
 {
     env->lock = EA;
 
-    return glue(ldq, MEMSUFFIX)(EA);
+    return glue(lduq, MEMSUFFIX)(EA);
 }
 
 static inline void glue(stl_c, MEMSUFFIX) (target_ulong EA, uint32_t data)
@@ -82,12 +82,12 @@ ALPHA_LD_OP(bu, ldub);
 ALPHA_ST_OP(b, stb);
 ALPHA_LD_OP(wu, lduw);
 ALPHA_ST_OP(w, stw);
-ALPHA_LD_OP(l, ldl);
+ALPHA_LD_OP(l, ldul);
 ALPHA_ST_OP(l, stl);
-ALPHA_LD_OP(q, ldq);
+ALPHA_LD_OP(q, lduq);
 ALPHA_ST_OP(q, stq);
 
-ALPHA_LD_OP(q_u, ldq);
+ALPHA_LD_OP(q_u, lduq);
 ALPHA_ST_OP(q_u, stq);
 
 ALPHA_LD_OP(l_l, ldl_l);
Index: target-alpha/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/translate.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 translate.c
--- target-alpha/translate.c	14 Oct 2007 08:50:17 -0000	1.6
+++ target-alpha/translate.c	16 Oct 2007 11:39:05 -0000
@@ -2010,7 +2010,7 @@ int gen_intermediate_code_internal (CPUS
                     ctx.pc, ctx.mem_idx);
         }
 #endif
-        insn = ldl_code(ctx.pc);
+        insn = ldul_code(ctx.pc);
 #if defined ALPHA_DEBUG_DISAS
         insn_count++;
         if (logfile != NULL) {
Index: target-arm/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-arm/exec.h,v
retrieving revision 1.14
diff -u -d -d -p -r1.14 exec.h
--- target-arm/exec.h	14 Oct 2007 07:07:05 -0000	1.14
+++ target-arm/exec.h	16 Oct 2007 11:39:05 -0000
@@ -64,6 +64,9 @@ static inline int cpu_halted(CPUState *e
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 /* In op_helper.c */
Index: target-arm/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/helper.c,v
retrieving revision 1.23
diff -u -d -d -p -r1.23 helper.c
--- target-arm/helper.c	14 Oct 2007 07:07:05 -0000	1.23
+++ target-arm/helper.c	16 Oct 2007 11:39:05 -0000
@@ -297,7 +297,7 @@ void do_interrupt(CPUARMState *env)
             if (env->thumb) {
                 mask = lduw_code(env->regs[15] - 2) & 0xff;
             } else {
-                mask = ldl_code(env->regs[15] - 4) & 0xffffff;
+                mask = ldul_code(env->regs[15] - 4) & 0xffffff;
             }
             /* Only intercept calls from privileged modes, to provide some
                semblance of security.  */
@@ -473,7 +473,7 @@ static int get_phys_addr(CPUState *env, 
         /* Pagetable walk.  */
         /* Lookup l1 descriptor.  */
         table = (env->cp15.c2_base & 0xffffc000) | ((address >> 18) & 0x3ffc);
-        desc = ldl_phys(table);
+        desc = ldul_phys(table);
         type = (desc & 3);
         domain = (env->cp15.c3 >> ((desc >> 4) & 0x1e)) & 3;
         if (type == 0) {
@@ -502,7 +502,7 @@ static int get_phys_addr(CPUState *env, 
                 /* Fine pagetable.  */
                 table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
             }
-            desc = ldl_phys(table);
+            desc = ldul_phys(table);
             switch (desc & 3) {
             case 0: /* Page translation fault.  */
                 code = 7;
Index: target-arm/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/op_helper.c,v
retrieving revision 1.7
diff -u -d -d -p -r1.7 op_helper.c
--- target-arm/op_helper.c	14 Oct 2007 07:07:05 -0000	1.7
+++ target-arm/op_helper.c	16 Oct 2007 11:39:05 -0000
@@ -180,6 +180,21 @@ void do_vfp_get_fpscr(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -191,6 +206,7 @@ void do_vfp_get_fpscr(void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-arm/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-arm/op_mem.h,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_mem.h
--- target-arm/op_mem.h	30 Apr 2007 02:02:16 -0000	1.2
+++ target-arm/op_mem.h	16 Oct 2007 11:39:05 -0000
@@ -1,18 +1,17 @@
 /* ARM memory operations.  */
 
-/* Load from address T1 into T0.  */
-#define MEM_LD_OP(name) \
+#define MEM_LD_OP(name, lname) \
 void OPPROTO glue(op_ld##name,MEMSUFFIX)(void) \
 { \
-    T0 = glue(ld##name,MEMSUFFIX)(T1); \
+    T0 = glue(ld##lname,MEMSUFFIX)(T1); \
     FORCE_RET(); \
 }
 
-MEM_LD_OP(ub)
-MEM_LD_OP(sb)
-MEM_LD_OP(uw)
-MEM_LD_OP(sw)
-MEM_LD_OP(l)
+MEM_LD_OP(ub,ub)
+MEM_LD_OP(sb,sb)
+MEM_LD_OP(uw,uw)
+MEM_LD_OP(sw,sw)
+MEM_LD_OP(l,ul)
 
 #undef MEM_LD_OP
 
@@ -45,7 +44,7 @@ void OPPROTO glue(op_swp##name,MEMSUFFIX
 }
 
 MEM_SWP_OP(b, ub)
-MEM_SWP_OP(l, l)
+MEM_SWP_OP(l, ul)
 
 #undef MEM_SWP_OP
 
@@ -82,8 +81,8 @@ void OPPROTO glue(op_iwmmxt_st##name,MEM
 
 MMX_MEM_OP(b, ub)
 MMX_MEM_OP(w, uw)
-MMX_MEM_OP(l, l)
-MMX_MEM_OP(q, q)
+MMX_MEM_OP(l, ul)
+MMX_MEM_OP(q, uq)
 
 #undef MMX_MEM_OP
 
Index: target-arm/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/translate.c,v
retrieving revision 1.57
diff -u -d -d -p -r1.57 translate.c
--- target-arm/translate.c	17 Sep 2007 08:09:51 -0000	1.57
+++ target-arm/translate.c	16 Oct 2007 11:39:05 -0000
@@ -2206,7 +2206,7 @@ static void disas_arm_insn(CPUState * en
 {
     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
 
-    insn = ldl_code(s->pc);
+    insn = ldul_code(s->pc);
     s->pc += 4;
 
     cond = insn >> 28;
Index: target-cris/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-cris/exec.h,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 exec.h
--- target-cris/exec.h	14 Oct 2007 07:07:06 -0000	1.2
+++ target-cris/exec.h	16 Oct 2007 11:39:06 -0000
@@ -50,6 +50,9 @@ void tlb_fill (target_ulong addr, int is
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_cris_flush_flags(CPUCRISState *env, int cc_op);
Index: target-cris/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/helper.c,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 helper.c
--- target-cris/helper.c	14 Oct 2007 07:07:06 -0000	1.2
+++ target-cris/helper.c	16 Oct 2007 11:39:06 -0000
@@ -106,7 +106,7 @@ void do_interrupt(CPUState *env)
 //			printf ("BREAK! %d\n", env->trapnr);
 			irqnum = env->trapnr;
 			ebp = env->pregs[SR_EBP];
-			isr = ldl_code(ebp + irqnum * 4);
+			isr = ldul_code(ebp + irqnum * 4);
 			env->pregs[SR_ERP] = env->pc + 2;
 			env->pc = isr;
 
@@ -117,7 +117,7 @@ void do_interrupt(CPUState *env)
 //			printf ("MMU miss\n");
 			irqnum = 4;
 			ebp = env->pregs[SR_EBP];
-			isr = ldl_code(ebp + irqnum * 4);
+			isr = ldul_code(ebp + irqnum * 4);
 			env->pregs[SR_ERP] = env->pc;
 			env->pc = isr;
 			cris_shift_ccs(env);
@@ -138,7 +138,7 @@ void do_interrupt(CPUState *env)
 					__builtin_clz(env->pending_interrupts);
 				irqnum += 0x30;
 				ebp = env->pregs[SR_EBP];
-				isr = ldl_code(ebp + irqnum * 4);
+				isr = ldul_code(ebp + irqnum * 4);
 				env->pregs[SR_ERP] = env->pc;
 				env->pc = isr;
 
Index: target-cris/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/op_helper.c,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_helper.c
--- target-cris/op_helper.c	14 Oct 2007 07:07:06 -0000	1.2
+++ target-cris/op_helper.c	16 Oct 2007 11:39:06 -0000
@@ -25,6 +25,21 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -36,6 +51,7 @@
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-cris/op_mem.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/op_mem.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 op_mem.c
--- target-cris/op_mem.c	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/op_mem.c	16 Oct 2007 11:39:06 -0000
@@ -49,7 +49,7 @@ void glue(op_stw_T0_T1, MEMSUFFIX) (void
 }
 
 void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
-    T0 = glue(ldl, MEMSUFFIX) (T0);
+    T0 = glue(ldul, MEMSUFFIX) (T0);
     RETURN();
 }
 
Index: target-cris/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/translate.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 translate.c
--- target-cris/translate.c	8 Oct 2007 12:49:08 -0000	1.1
+++ target-cris/translate.c	16 Oct 2007 11:39:06 -0000
@@ -828,7 +828,7 @@ static int dec_prep_alu_m(DisasContext *
 		if (memsize == 1)
 			insn_len++;
 
-		imm = ldl_code(dc->pc + 2);
+		imm = ldul_code(dc->pc + 2);
 		if (memsize != 4) {
 			if (s_ext) {
 				imm = sign_extend(imm, (memsize * 8) - 1);
@@ -1962,7 +1962,7 @@ static unsigned int dec_lapc_im(DisasCon
 	rd = dc->op2;
 
 	cris_cc_mask(dc, 0);
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 	DIS(fprintf (logfile, "lapc 0x%x, $r%u\n", imm + dc->pc, dc->op2));
 	gen_op_movl_T0_im (dc->pc + imm);
 	gen_movl_reg_T0[rd] ();
@@ -1999,7 +1999,7 @@ static unsigned int dec_jas_im(DisasCont
 {
 	uint32_t imm;
 
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "jas 0x%x\n", imm));
 	cris_cc_mask(dc, 0);
@@ -2016,7 +2016,7 @@ static unsigned int dec_jasc_im(DisasCon
 {
 	uint32_t imm;
 
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "jasc 0x%x\n", imm));
 	cris_cc_mask(dc, 0);
@@ -2047,7 +2047,7 @@ static unsigned int dec_bcc_im(DisasCont
 	int32_t offset;
 	uint32_t cond = dc->op2;
 
-	offset = ldl_code(dc->pc + 2);
+	offset = ldul_code(dc->pc + 2);
 	offset = sign_extend(offset, 15);
 
 	DIS(fprintf (logfile, "b%s %d pc=%x dst=%x\n",
@@ -2065,7 +2065,7 @@ static unsigned int dec_bas_im(DisasCont
 	int32_t simm;
 
 
-	simm = ldl_code(dc->pc + 2);
+	simm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "bas 0x%x, $p%u\n", dc->pc + simm, dc->op2));
 	cris_cc_mask(dc, 0);
@@ -2081,7 +2081,7 @@ static unsigned int dec_bas_im(DisasCont
 static unsigned int dec_basc_im(DisasContext *dc)
 {
 	int32_t simm;
-	simm = ldl_code(dc->pc + 2);
+	simm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "basc 0x%x, $p%u\n", dc->pc + simm, dc->op2));
 	cris_cc_mask(dc, 0);
@@ -2259,7 +2259,7 @@ cris_decoder(DisasContext *dc)
 	int i;
 
 	/* Load a halfword onto the instruction register.  */
-	tmp = ldl_code(dc->pc);
+	tmp = ldul_code(dc->pc);
 	dc->ir = tmp & 0xffff;
 
 	/* Now decode it.  */
Index: target-i386/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/exec.h,v
retrieving revision 1.38
diff -u -d -d -p -r1.38 exec.h
--- target-i386/exec.h	14 Oct 2007 07:07:06 -0000	1.38
+++ target-i386/exec.h	16 Oct 2007 11:39:06 -0000
@@ -217,6 +217,9 @@ void check_iol_DX(void);
 #if !defined(CONFIG_USER_ONLY)
 
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 
 static inline double ldfq(target_ulong ptr)
 {
@@ -224,7 +227,7 @@ static inline double ldfq(target_ulong p
         double d;
         uint64_t i;
     } u;
-    u.i = ldq(ptr);
+    u.i = lduq(ptr);
     return u.d;
 }
 
@@ -244,7 +247,7 @@ static inline float ldfl(target_ulong pt
         float f;
         uint32_t i;
     } u;
-    u.i = ldl(ptr);
+    u.i = ldul(ptr);
     return u.f;
 }
 
@@ -388,7 +391,7 @@ static inline CPU86_LDouble helper_fldt(
     /* XXX: handle overflow ? */
     e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */
     e |= (upper >> 4) & 0x800; /* sign */
-    ll = (ldq(ptr) >> 11) & ((1LL << 52) - 1);
+    ll = (lduq(ptr) >> 11) & ((1LL << 52) - 1);
 #ifdef __arm__
     temp.l.upper = (e << 20) | (ll >> 32);
     temp.l.lower = ll;
@@ -419,12 +422,12 @@ static inline void helper_fstt(CPU86_LDo
 
 static inline CPU86_LDouble helper_fldt(target_ulong ptr)
 {
-    return *(CPU86_LDouble *)ptr;
+    return *(CPU86_LDouble *)(unsigned long)ptr;
 }
 
 static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
 {
-    *(CPU86_LDouble *)ptr = f;
+    *(CPU86_LDouble *)(unsigned long)ptr = f;
 }
 
 #else
@@ -435,7 +438,7 @@ static inline CPU86_LDouble helper_fldt(
 {
     CPU86_LDoubleU temp;
 
-    temp.l.lower = ldq(ptr);
+    temp.l.lower = lduq(ptr);
     temp.l.upper = lduw(ptr + 8);
     return temp.d;
 }
Index: target-i386/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/helper.c,v
retrieving revision 1.90
diff -u -d -d -p -r1.90 helper.c
--- target-i386/helper.c	14 Oct 2007 07:07:06 -0000	1.90
+++ target-i386/helper.c	16 Oct 2007 11:39:06 -0000
@@ -122,8 +122,8 @@ static inline int load_segment(uint32_t 
     if ((index + 7) > dt->limit)
         return -1;
     ptr = dt->base + index;
-    *e1_ptr = ldl_kernel(ptr);
-    *e2_ptr = ldl_kernel(ptr + 4);
+    *e1_ptr = ldul_kernel(ptr);
+    *e2_ptr = ldul_kernel(ptr + 4);
     return 0;
 }
 
@@ -186,7 +186,7 @@ static inline void get_ss_esp_from_tss(u
         *esp_ptr = lduw_kernel(env->tr.base + index);
         *ss_ptr = lduw_kernel(env->tr.base + index + 2);
     } else {
-        *esp_ptr = ldl_kernel(env->tr.base + index);
+        *esp_ptr = ldul_kernel(env->tr.base + index);
         *ss_ptr = lduw_kernel(env->tr.base + index + 4);
     }
 }
@@ -302,15 +302,15 @@ static void switch_tss(int tss_selector,
     /* read all the registers from the new TSS */
     if (type & 8) {
         /* 32 bit */
-        new_cr3 = ldl_kernel(tss_base + 0x1c);
-        new_eip = ldl_kernel(tss_base + 0x20);
-        new_eflags = ldl_kernel(tss_base + 0x24);
+        new_cr3 = ldul_kernel(tss_base + 0x1c);
+        new_eip = ldul_kernel(tss_base + 0x20);
+        new_eflags = ldul_kernel(tss_base + 0x24);
         for(i = 0; i < 8; i++)
-            new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
+            new_regs[i] = ldul_kernel(tss_base + (0x28 + i * 4));
         for(i = 0; i < 6; i++)
             new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
         new_ldt = lduw_kernel(tss_base + 0x60);
-        new_trap = ldl_kernel(tss_base + 0x64);
+        new_trap = ldul_kernel(tss_base + 0x64);
     } else {
         /* 16 bit */
         new_cr3 = 0;
@@ -341,7 +341,7 @@ static void switch_tss(int tss_selector,
         target_ulong ptr;
         uint32_t e2;
         ptr = env->gdt.base + (env->tr.selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
+        e2 = ldul_kernel(ptr + 4);
         e2 &= ~DESC_TSS_BUSY_MASK;
         stl_kernel(ptr + 4, e2);
     }
@@ -393,7 +393,7 @@ static void switch_tss(int tss_selector,
         target_ulong ptr;
         uint32_t e2;
         ptr = env->gdt.base + (tss_selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
+        e2 = ldul_kernel(ptr + 4);
         e2 |= DESC_TSS_BUSY_MASK;
         stl_kernel(ptr + 4, e2);
     }
@@ -456,8 +456,8 @@ static void switch_tss(int tss_selector,
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
             raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
         if (!(e2 & DESC_P_MASK))
@@ -580,7 +580,7 @@ do {\
 
 #define POPL(ssp, sp, sp_mask, val)\
 {\
-    val = (uint32_t)ldl_kernel((ssp) + (sp & (sp_mask)));\
+    val = (uint32_t)ldul_kernel((ssp) + (sp & (sp_mask)));\
     sp += 4;\
 }
 
@@ -629,8 +629,8 @@ static void do_interrupt_protected(int i
     if (intno * 8 + 7 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
     ptr = dt->base + intno * 8;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
+    e1 = ldul_kernel(ptr);
+    e2 = ldul_kernel(ptr + 4);
     /* check gate type */
     type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
     switch(type) {
@@ -810,7 +810,7 @@ static void do_interrupt_protected(int i
 
 #define POPQ(sp, val)\
 {\
-    val = ldq_kernel(sp);\
+    val = lduq_kernel(sp);\
     sp += 8;\
 }
 
@@ -828,7 +828,7 @@ static inline target_ulong get_rsp_from_
     index = 8 * level + 4;
     if ((index + 7) > env->tr.limit)
         raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
-    return ldq_kernel(env->tr.base + index);
+    return lduq_kernel(env->tr.base + index);
 }
 
 /* 64 bit interrupt */
@@ -875,9 +875,9 @@ static void do_interrupt64(int intno, in
     if (intno * 16 + 15 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
     ptr = dt->base + intno * 16;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
-    e3 = ldl_kernel(ptr + 8);
+    e1 = ldul_kernel(ptr);
+    e2 = ldul_kernel(ptr + 4);
+    e3 = ldul_kernel(ptr + 8);
     /* check gate type */
     type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
     switch(type) {
@@ -1147,7 +1147,7 @@ void do_interrupt_user(int intno, int is
 
     dt = &env->idt;
     ptr = dt->base + (intno * 8);
-    e2 = ldl_kernel(ptr + 4);
+    e2 = ldul_kernel(ptr + 4);
 
     dpl = (e2 >> DESC_DPL_SHIFT) & 3;
     cpl = env->hflags & HF_CPL_MASK;
@@ -1458,7 +1458,7 @@ void helper_rsm(void)
 
     sm_state = env->smbase + 0x8000;
 #ifdef TARGET_X86_64
-    env->efer = ldq_phys(sm_state + 0x7ed0);
+    env->efer = lduq_phys(sm_state + 0x7ed0);
     if (env->efer & MSR_EFER_LMA)
         env->hflags |= HF_LMA_MASK;
     else
@@ -1468,83 +1468,83 @@ void helper_rsm(void)
         offset = 0x7e00 + i * 16;
         cpu_x86_load_seg_cache(env, i,
                                lduw_phys(sm_state + offset),
-                               ldq_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
+                               lduq_phys(sm_state + offset + 8),
+                               ldul_phys(sm_state + offset + 4),
                                (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8);
     }
 
-    env->gdt.base = ldq_phys(sm_state + 0x7e68);
-    env->gdt.limit = ldl_phys(sm_state + 0x7e64);
+    env->gdt.base = lduq_phys(sm_state + 0x7e68);
+    env->gdt.limit = ldul_phys(sm_state + 0x7e64);
 
     env->ldt.selector = lduw_phys(sm_state + 0x7e70);
-    env->ldt.base = ldq_phys(sm_state + 0x7e78);
-    env->ldt.limit = ldl_phys(sm_state + 0x7e74);
+    env->ldt.base = lduq_phys(sm_state + 0x7e78);
+    env->ldt.limit = ldul_phys(sm_state + 0x7e74);
     env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8;
 
-    env->idt.base = ldq_phys(sm_state + 0x7e88);
-    env->idt.limit = ldl_phys(sm_state + 0x7e84);
+    env->idt.base = lduq_phys(sm_state + 0x7e88);
+    env->idt.limit = ldul_phys(sm_state + 0x7e84);
 
     env->tr.selector = lduw_phys(sm_state + 0x7e90);
-    env->tr.base = ldq_phys(sm_state + 0x7e98);
-    env->tr.limit = ldl_phys(sm_state + 0x7e94);
+    env->tr.base = lduq_phys(sm_state + 0x7e98);
+    env->tr.limit = ldul_phys(sm_state + 0x7e94);
     env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8;
 
-    EAX = ldq_phys(sm_state + 0x7ff8);
-    ECX = ldq_phys(sm_state + 0x7ff0);
-    EDX = ldq_phys(sm_state + 0x7fe8);
-    EBX = ldq_phys(sm_state + 0x7fe0);
-    ESP = ldq_phys(sm_state + 0x7fd8);
-    EBP = ldq_phys(sm_state + 0x7fd0);
-    ESI = ldq_phys(sm_state + 0x7fc8);
-    EDI = ldq_phys(sm_state + 0x7fc0);
+    EAX = lduq_phys(sm_state + 0x7ff8);
+    ECX = lduq_phys(sm_state + 0x7ff0);
+    EDX = lduq_phys(sm_state + 0x7fe8);
+    EBX = lduq_phys(sm_state + 0x7fe0);
+    ESP = lduq_phys(sm_state + 0x7fd8);
+    EBP = lduq_phys(sm_state + 0x7fd0);
+    ESI = lduq_phys(sm_state + 0x7fc8);
+    EDI = lduq_phys(sm_state + 0x7fc0);
     for(i = 8; i < 16; i++)
-        env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8);
-    env->eip = ldq_phys(sm_state + 0x7f78);
-    load_eflags(ldl_phys(sm_state + 0x7f70),
+        env->regs[i] = lduq_phys(sm_state + 0x7ff8 - i * 8);
+    env->eip = lduq_phys(sm_state + 0x7f78);
+    load_eflags(ldul_phys(sm_state + 0x7f70),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->dr[6] = ldl_phys(sm_state + 0x7f68);
-    env->dr[7] = ldl_phys(sm_state + 0x7f60);
+    env->dr[6] = ldul_phys(sm_state + 0x7f68);
+    env->dr[7] = ldul_phys(sm_state + 0x7f60);
 
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50));
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58));
+    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f48));
+    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7f50));
+    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7f58));
 
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff;
+        env->smbase = ldul_phys(sm_state + 0x7f00) & ~0x7fff;
     }
 #else
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8));
-    load_eflags(ldl_phys(sm_state + 0x7ff4),
+    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7ffc));
+    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7ff8));
+    load_eflags(ldul_phys(sm_state + 0x7ff4),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->eip = ldl_phys(sm_state + 0x7ff0);
-    EDI = ldl_phys(sm_state + 0x7fec);
-    ESI = ldl_phys(sm_state + 0x7fe8);
-    EBP = ldl_phys(sm_state + 0x7fe4);
-    ESP = ldl_phys(sm_state + 0x7fe0);
-    EBX = ldl_phys(sm_state + 0x7fdc);
-    EDX = ldl_phys(sm_state + 0x7fd8);
-    ECX = ldl_phys(sm_state + 0x7fd4);
-    EAX = ldl_phys(sm_state + 0x7fd0);
-    env->dr[6] = ldl_phys(sm_state + 0x7fcc);
-    env->dr[7] = ldl_phys(sm_state + 0x7fc8);
+    env->eip = ldul_phys(sm_state + 0x7ff0);
+    EDI = ldul_phys(sm_state + 0x7fec);
+    ESI = ldul_phys(sm_state + 0x7fe8);
+    EBP = ldul_phys(sm_state + 0x7fe4);
+    ESP = ldul_phys(sm_state + 0x7fe0);
+    EBX = ldul_phys(sm_state + 0x7fdc);
+    EDX = ldul_phys(sm_state + 0x7fd8);
+    ECX = ldul_phys(sm_state + 0x7fd4);
+    EAX = ldul_phys(sm_state + 0x7fd0);
+    env->dr[6] = ldul_phys(sm_state + 0x7fcc);
+    env->dr[7] = ldul_phys(sm_state + 0x7fc8);
 
-    env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff;
-    env->tr.base = ldl_phys(sm_state + 0x7f64);
-    env->tr.limit = ldl_phys(sm_state + 0x7f60);
-    env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
+    env->tr.selector = ldul_phys(sm_state + 0x7fc4) & 0xffff;
+    env->tr.base = ldul_phys(sm_state + 0x7f64);
+    env->tr.limit = ldul_phys(sm_state + 0x7f60);
+    env->tr.flags = (ldul_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
 
-    env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff;
-    env->ldt.base = ldl_phys(sm_state + 0x7f80);
-    env->ldt.limit = ldl_phys(sm_state + 0x7f7c);
-    env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
+    env->ldt.selector = ldul_phys(sm_state + 0x7fc0) & 0xffff;
+    env->ldt.base = ldul_phys(sm_state + 0x7f80);
+    env->ldt.limit = ldul_phys(sm_state + 0x7f7c);
+    env->ldt.flags = (ldul_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
 
-    env->gdt.base = ldl_phys(sm_state + 0x7f74);
-    env->gdt.limit = ldl_phys(sm_state + 0x7f70);
+    env->gdt.base = ldul_phys(sm_state + 0x7f74);
+    env->gdt.limit = ldul_phys(sm_state + 0x7f70);
 
-    env->idt.base = ldl_phys(sm_state + 0x7f58);
-    env->idt.limit = ldl_phys(sm_state + 0x7f54);
+    env->idt.base = ldul_phys(sm_state + 0x7f58);
+    env->idt.limit = ldul_phys(sm_state + 0x7f54);
 
     for(i = 0; i < 6; i++) {
         if (i < 3)
@@ -1552,16 +1552,16 @@ void helper_rsm(void)
         else
             offset = 0x7f2c + (i - 3) * 12;
         cpu_x86_load_seg_cache(env, i,
-                               ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
-                               ldl_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
-                               (ldl_phys(sm_state + offset) & 0xf0ff) << 8);
+                               ldul_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
+                               ldul_phys(sm_state + offset + 8),
+                               ldul_phys(sm_state + offset + 4),
+                               (ldul_phys(sm_state + offset) & 0xf0ff) << 8);
     }
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14));
+    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f14));
 
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff;
+        env->smbase = ldul_phys(sm_state + 0x7ef8) & ~0x7fff;
     }
 #endif
     CC_OP = CC_OP_EFLAGS;
@@ -1643,7 +1643,7 @@ void helper_cmpxchg8b(void)
     int eflags;
 
     eflags = cc_table[CC_OP].compute_all();
-    d = ldq(A0);
+    d = lduq(A0);
     if (d == (((uint64_t)EDX << 32) | EAX)) {
         stq(A0, ((uint64_t)ECX << 32) | EBX);
         eflags |= CC_Z;
@@ -1761,7 +1761,7 @@ void helper_enter_level(int level, int d
         while (--level) {
             esp -= 4;
             ebp -= 4;
-            stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask)));
+            stl(ssp + (esp & esp_mask), ldul(ssp + (ebp & esp_mask)));
         }
         esp -= 4;
         stl(ssp + (esp & esp_mask), T1);
@@ -1791,7 +1791,7 @@ void helper_enter64_level(int level, int
         while (--level) {
             esp -= 8;
             ebp -= 8;
-            stq(esp, ldq(ebp));
+            stq(esp, lduq(ebp));
         }
         esp -= 8;
         stq(esp, T1);
@@ -1836,8 +1836,8 @@ void helper_lldt_T0(void)
         if ((index + entry_limit) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         if (!(e2 & DESC_P_MASK))
@@ -1845,7 +1845,7 @@ void helper_lldt_T0(void)
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             uint32_t e3;
-            e3 = ldl_kernel(ptr + 8);
+            e3 = ldul_kernel(ptr + 8);
             load_seg_cache_raw_dt(&env->ldt, e1, e2);
             env->ldt.base |= (target_ulong)e3 << 32;
         } else
@@ -1885,8 +1885,8 @@ void helper_ltr_T0(void)
         if ((index + entry_limit) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
         if ((e2 & DESC_S_MASK) ||
             (type != 1 && type != 9))
@@ -1896,8 +1896,8 @@ void helper_ltr_T0(void)
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             uint32_t e3, e4;
-            e3 = ldl_kernel(ptr + 8);
-            e4 = ldl_kernel(ptr + 12);
+            e3 = ldul_kernel(ptr + 8);
+            e4 = ldul_kernel(ptr + 12);
             if ((e4 >> DESC_TYPE_SHIFT) & 0xf)
                 raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
             load_seg_cache_raw_dt(&env->tr, e1, e2);
@@ -1943,8 +1943,8 @@ void load_seg(int seg_reg, int selector)
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
 
         if (!(e2 & DESC_S_MASK))
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
@@ -2273,7 +2273,7 @@ void helper_lcall_protected_T0_T1(int sh
                 PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector);
                 PUSHL(ssp, sp, sp_mask, ESP);
                 for(i = param_count - 1; i >= 0; i--) {
-                    val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
+                    val = ldul_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
                     PUSHL(ssp, sp, sp_mask, val);
                 }
             } else {
@@ -3569,13 +3569,13 @@ void helper_fxrstor(target_ulong ptr, in
 
     if (env->cr[4] & CR4_OSFXSR_MASK) {
         /* XXX: finish it */
-        env->mxcsr = ldl(ptr + 0x18);
-        //ldl(ptr + 0x1c);
+        env->mxcsr = ldul(ptr + 0x18);
+        //ldul(ptr + 0x1c);
         nb_xmm_regs = 8 << data64;
         addr = ptr + 0xa0;
         for(i = 0; i < nb_xmm_regs; i++) {
-            env->xmm_regs[i].XMM_Q(0) = ldq(addr);
-            env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
+            env->xmm_regs[i].XMM_Q(0) = lduq(addr);
+            env->xmm_regs[i].XMM_Q(1) = lduq(addr + 8);
             addr += 16;
         }
     }
@@ -3867,6 +3867,21 @@ void update_fp_status(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -3878,6 +3893,7 @@ void update_fp_status(void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 #endif
 
@@ -4005,27 +4021,27 @@ void helper_vmrun(target_ulong addr)
        vmcb in svm mode */
     /* We shift all the intercept bits so we can OR them with the TB
        flags later on */
-    env->intercept            = (ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept)) << INTERCEPT_INTR) | INTERCEPT_SVM_MASK;
+    env->intercept            = (lduq_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept)) << INTERCEPT_INTR) | INTERCEPT_SVM_MASK;
     env->intercept_cr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_read));
     env->intercept_cr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write));
     env->intercept_dr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read));
     env->intercept_dr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write));
-    env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
+    env->intercept_exceptions = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
 
-    env->gdt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
+    env->gdt.base  = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
+    env->gdt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
 
-    env->idt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
+    env->idt.base  = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
+    env->idt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
 
     /* clear exit_info_2 so we behave like the real hardware */
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
 
-    cpu_x86_update_cr0(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0)));
-    cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
-    cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
-    env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
-    int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+    cpu_x86_update_cr0(env, lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0)));
+    cpu_x86_update_cr4(env, lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
+    cpu_x86_update_cr3(env, lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
+    env->cr[2] = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
+    int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
     if (int_ctl & V_INTR_MASKING_MASK) {
         env->cr[8] = int_ctl & V_TPR_MASK;
         if (env->eflags & IF_MASK)
@@ -4033,13 +4049,13 @@ void helper_vmrun(target_ulong addr)
     }
 
 #ifdef TARGET_X86_64
-    env->efer = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer));
+    env->efer = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer));
     env->hflags &= ~HF_LMA_MASK;
     if (env->efer & MSR_EFER_LMA)
        env->hflags |= HF_LMA_MASK;
 #endif
     env->eflags = 0;
-    load_eflags(ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags)),
+    load_eflags(lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags)),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
     CC_OP = CC_OP_EFLAGS;
     CC_DST = 0xffffffff;
@@ -4049,12 +4065,12 @@ void helper_vmrun(target_ulong addr)
     SVM_LOAD_SEG(env->vm_vmcb, SS, ss);
     SVM_LOAD_SEG(env->vm_vmcb, DS, ds);
 
-    EIP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip));
+    EIP = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip));
     env->eip = EIP;
-    ESP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp));
-    EAX = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax));
-    env->dr[7] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7));
-    env->dr[6] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6));
+    ESP = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp));
+    EAX = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax));
+    env->dr[7] = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7));
+    env->dr[6] = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6));
     cpu_x86_set_cpl(env, ldub_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl)));
 
     /* FIXME: guest state consistency checks */
@@ -4073,11 +4089,11 @@ void helper_vmrun(target_ulong addr)
     regs_to_env();
 
     /* maybe we need to inject an event */
-    event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+    event_inj = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
     if (event_inj & SVM_EVTINJ_VALID) {
         uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
         uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
-        uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
+        uint32_t event_inj_err = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
 
         if (loglevel & CPU_LOG_TB_IN_ASM)
@@ -4137,7 +4153,7 @@ void helper_vmload(target_ulong addr)
 {
     if (loglevel & CPU_LOG_TB_IN_ASM)
         fprintf(logfile,"vmload! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
-                addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
+                addr, lduq_phys(addr + offsetof(struct vmcb, save.fs.base)),
                 env->segs[R_FS].base);
 
     SVM_LOAD_SEG2(addr, segs[R_FS], fs);
@@ -4146,22 +4162,22 @@ void helper_vmload(target_ulong addr)
     SVM_LOAD_SEG2(addr, ldt, ldtr);
 
 #ifdef TARGET_X86_64
-    env->kernelgsbase = ldq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base));
-    env->lstar = ldq_phys(addr + offsetof(struct vmcb, save.lstar));
-    env->cstar = ldq_phys(addr + offsetof(struct vmcb, save.cstar));
-    env->fmask = ldq_phys(addr + offsetof(struct vmcb, save.sfmask));
+    env->kernelgsbase = lduq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base));
+    env->lstar = lduq_phys(addr + offsetof(struct vmcb, save.lstar));
+    env->cstar = lduq_phys(addr + offsetof(struct vmcb, save.cstar));
+    env->fmask = lduq_phys(addr + offsetof(struct vmcb, save.sfmask));
 #endif
-    env->star = ldq_phys(addr + offsetof(struct vmcb, save.star));
-    env->sysenter_cs = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_cs));
-    env->sysenter_esp = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_esp));
-    env->sysenter_eip = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_eip));
+    env->star = lduq_phys(addr + offsetof(struct vmcb, save.star));
+    env->sysenter_cs = lduq_phys(addr + offsetof(struct vmcb, save.sysenter_cs));
+    env->sysenter_esp = lduq_phys(addr + offsetof(struct vmcb, save.sysenter_esp));
+    env->sysenter_eip = lduq_phys(addr + offsetof(struct vmcb, save.sysenter_eip));
 }
 
 void helper_vmsave(target_ulong addr)
 {
     if (loglevel & CPU_LOG_TB_IN_ASM)
         fprintf(logfile,"vmsave! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
-                addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
+                addr, lduq_phys(addr + offsetof(struct vmcb, save.fs.base)),
                 env->segs[R_FS].base);
 
     SVM_SAVE_SEG(addr, segs[R_FS], fs);
@@ -4228,7 +4244,7 @@ int svm_check_intercept_param(uint32_t t
     case SVM_EXIT_IOIO:
         if (INTERCEPTED(1ULL << INTERCEPT_IOIO_PROT)) {
             /* FIXME: this should be read in at vmrun (faster this way?) */
-            uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.iopm_base_pa));
+            uint64_t addr = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, control.iopm_base_pa));
             uint16_t port = (uint16_t) (param >> 16);
 
             if(ldub_phys(addr + port / 8) & (1 << (port % 8)))
@@ -4239,7 +4255,7 @@ int svm_check_intercept_param(uint32_t t
     case SVM_EXIT_MSR:
         if (INTERCEPTED(1ULL << INTERCEPT_MSR_PROT)) {
             /* FIXME: this should be read in at vmrun (faster this way?) */
-            uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.msrpm_base_pa));
+            uint64_t addr = lduq_phys(env->vm_vmcb + offsetof(struct vmcb, control.msrpm_base_pa));
             switch((uint32_t)ECX) {
             case 0 ... 0x1fff:
                 T0 = (ECX * 2) % 8;
@@ -4281,7 +4297,7 @@ void vmexit(uint64_t exit_code, uint64_t
     if (loglevel & CPU_LOG_TB_IN_ASM)
         fprintf(logfile,"vmexit(%016" PRIx64 ", %016" PRIx64 ", %016" PRIx64 ", " TARGET_FMT_lx ")!\n",
                 exit_code, exit_info_1,
-                ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2)),
+                lduq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2)),
                 EIP);
 
     if(env->hflags & HF_INHIBIT_IRQ_MASK) {
@@ -4309,7 +4325,7 @@ void vmexit(uint64_t exit_code, uint64_t
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]);
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]);
 
-    if ((int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
+    if ((int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
         int_ctl &= ~V_TPR_MASK;
         int_ctl |= env->cr[8] & V_TPR_MASK;
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl);
@@ -4329,27 +4345,27 @@ void vmexit(uint64_t exit_code, uint64_t
     env->intercept_exceptions = 0;
     env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
 
-    env->gdt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
+    env->gdt.base  = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
+    env->gdt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
 
-    env->idt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
+    env->idt.base  = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
+    env->idt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
 
-    cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
-    cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
-    cpu_x86_update_cr3(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3)));
+    cpu_x86_update_cr0(env, lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
+    cpu_x86_update_cr4(env, lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
+    cpu_x86_update_cr3(env, lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3)));
     if (int_ctl & V_INTR_MASKING_MASK)
-        env->cr[8] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr8));
+        env->cr[8] = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr8));
     /* we need to set the efer after the crs so the hidden flags get set properly */
 #ifdef TARGET_X86_64
-    env->efer  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer));
+    env->efer  = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer));
     env->hflags &= ~HF_LMA_MASK;
     if (env->efer & MSR_EFER_LMA)
        env->hflags |= HF_LMA_MASK;
 #endif
 
     env->eflags = 0;
-    load_eflags(ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags)),
+    load_eflags(lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags)),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
     CC_OP = CC_OP_EFLAGS;
 
@@ -4358,12 +4374,12 @@ void vmexit(uint64_t exit_code, uint64_t
     SVM_LOAD_SEG(env->vm_hsave, SS, ss);
     SVM_LOAD_SEG(env->vm_hsave, DS, ds);
 
-    EIP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip));
-    ESP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp));
-    EAX = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax));
+    EIP = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip));
+    ESP = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp));
+    EAX = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax));
 
-    env->dr[6] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6));
-    env->dr[7] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7));
+    env->dr[6] = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6));
+    env->dr[7] = lduq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7));
 
     /* other setups */
     cpu_x86_set_cpl(env, 0);
Index: target-i386/helper2.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/helper2.c,v
retrieving revision 1.53
diff -u -d -d -p -r1.53 helper2.c
--- target-i386/helper2.c	14 Oct 2007 07:07:06 -0000	1.53
+++ target-i386/helper2.c	16 Oct 2007 11:39:06 -0000
@@ -641,7 +641,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
 
             pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pml4e = ldq_phys(pml4e_addr);
+            pml4e = lduq_phys(pml4e_addr);
             if (!(pml4e & PG_PRESENT_MASK)) {
                 error_code = 0;
                 goto do_fault;
@@ -657,7 +657,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
             ptep = pml4e ^ PG_NX_MASK;
             pdpe_addr = ((pml4e & PHYS_ADDR_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pdpe = ldq_phys(pdpe_addr);
+            pdpe = lduq_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK)) {
                 error_code = 0;
                 goto do_fault;
@@ -677,7 +677,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
             /* XXX: load them when cr3 is loaded ? */
             pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
                 env->a20_mask;
-            pdpe = ldq_phys(pdpe_addr);
+            pdpe = lduq_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK)) {
                 error_code = 0;
                 goto do_fault;
@@ -687,7 +687,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
 
         pde_addr = ((pdpe & PHYS_ADDR_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
             env->a20_mask;
-        pde = ldq_phys(pde_addr);
+        pde = lduq_phys(pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             error_code = 0;
             goto do_fault;
@@ -731,7 +731,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
             }
             pte_addr = ((pde & PHYS_ADDR_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pte = ldq_phys(pte_addr);
+            pte = lduq_phys(pte_addr);
             if (!(pte & PG_PRESENT_MASK)) {
                 error_code = 0;
                 goto do_fault;
@@ -772,7 +772,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
         /* page directory entry */
         pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) &
             env->a20_mask;
-        pde = ldl_phys(pde_addr);
+        pde = ldul_phys(pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             error_code = 0;
             goto do_fault;
@@ -810,7 +810,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
             /* page directory entry */
             pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
                 env->a20_mask;
-            pte = ldl_phys(pte_addr);
+            pte = ldul_phys(pte_addr);
             if (!(pte & PG_PRESENT_MASK)) {
                 error_code = 0;
                 goto do_fault;
@@ -910,13 +910,13 @@ target_phys_addr_t cpu_get_phys_page_deb
 
             pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pml4e = ldl_phys(pml4e_addr);
+            pml4e = ldul_phys(pml4e_addr);
             if (!(pml4e & PG_PRESENT_MASK))
                 return -1;
 
             pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pdpe = ldl_phys(pdpe_addr);
+            pdpe = ldul_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK))
                 return -1;
         } else
@@ -924,14 +924,14 @@ target_phys_addr_t cpu_get_phys_page_deb
         {
             pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
                 env->a20_mask;
-            pdpe = ldl_phys(pdpe_addr);
+            pdpe = ldul_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK))
                 return -1;
         }
 
         pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) &
             env->a20_mask;
-        pde = ldl_phys(pde_addr);
+        pde = ldul_phys(pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             return -1;
         }
@@ -944,7 +944,7 @@ target_phys_addr_t cpu_get_phys_page_deb
             pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) &
                 env->a20_mask;
             page_size = 4096;
-            pte = ldl_phys(pte_addr);
+            pte = ldul_phys(pte_addr);
         }
     } else {
         if (!(env->cr[0] & CR0_PG_MASK)) {
@@ -953,7 +953,7 @@ target_phys_addr_t cpu_get_phys_page_deb
         } else {
             /* page directory entry */
             pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
-            pde = ldl_phys(pde_addr);
+            pde = ldul_phys(pde_addr);
             if (!(pde & PG_PRESENT_MASK))
                 return -1;
             if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
@@ -962,7 +962,7 @@ target_phys_addr_t cpu_get_phys_page_deb
             } else {
                 /* page directory entry */
                 pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
-                pte = ldl_phys(pte_addr);
+                pte = ldul_phys(pte_addr);
                 if (!(pte & PG_PRESENT_MASK))
                     return -1;
                 page_size = 4096;
Index: target-i386/op.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/op.c,v
retrieving revision 1.51
diff -u -d -d -p -r1.51 op.c
--- target-i386/op.c	23 Sep 2007 15:28:04 -0000	1.51
+++ target-i386/op.c	16 Oct 2007 11:39:06 -0000
@@ -716,8 +716,8 @@ void OPPROTO op_boundw(void)
 void OPPROTO op_boundl(void)
 {
     int low, high, v;
-    low = ldl(A0);
-    high = ldl(A0 + 4);
+    low = ldul(A0);
+    high = ldul(A0 + 4);
     v = T0;
     if (v < low || v > high) {
         raise_exception(EXCP05_BOUND);
@@ -747,8 +747,6 @@ void OPPROTO op_exit_tb(void)
 
 /* multiple size ops */
 
-#define ldul ldl
-
 #define SHIFT 0
 #include "ops_template.h"
 #undef SHIFT
@@ -1688,7 +1686,7 @@ CCTable cc_table[CC_OP_NB] = {
 void OPPROTO op_flds_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = ldl(A0);
+    FP_CONVERT.i32 = ldul(A0);
     FT0 = FP_CONVERT.f;
 #else
     FT0 = ldfl(A0);
@@ -1698,7 +1696,7 @@ void OPPROTO op_flds_FT0_A0(void)
 void OPPROTO op_fldl_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i64 = ldq(A0);
+    FP_CONVERT.i64 = lduq(A0);
     FT0 = FP_CONVERT.d;
 #else
     FT0 = ldfq(A0);
@@ -1715,12 +1713,12 @@ void helper_fild_FT0_A0(void)
 
 void helper_fildl_FT0_A0(void)
 {
-    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
 }
 
 void helper_fildll_FT0_A0(void)
 {
-    FT0 = (CPU86_LDouble)((int64_t)ldq(A0));
+    FT0 = (CPU86_LDouble)((int64_t)lduq(A0));
 }
 
 void OPPROTO op_fild_FT0_A0(void)
@@ -1753,20 +1751,20 @@ void OPPROTO op_fild_FT0_A0(void)
 void OPPROTO op_fildl_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    FP_CONVERT.i32 = (int32_t) ldul(A0);
     FT0 = (CPU86_LDouble)FP_CONVERT.i32;
 #else
-    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
 #endif
 }
 
 void OPPROTO op_fildll_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i64 = (int64_t) ldq(A0);
+    FP_CONVERT.i64 = (int64_t) lduq(A0);
     FT0 = (CPU86_LDouble)FP_CONVERT.i64;
 #else
-    FT0 = (CPU86_LDouble)((int64_t)ldq(A0));
+    FT0 = (CPU86_LDouble)((int64_t)lduq(A0));
 #endif
 }
 #endif
@@ -1778,7 +1776,7 @@ void OPPROTO op_flds_ST0_A0(void)
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = ldl(A0);
+    FP_CONVERT.i32 = ldul(A0);
     env->fpregs[new_fpstt].d = FP_CONVERT.f;
 #else
     env->fpregs[new_fpstt].d = ldfl(A0);
@@ -1792,7 +1790,7 @@ void OPPROTO op_fldl_ST0_A0(void)
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i64 = ldq(A0);
+    FP_CONVERT.i64 = lduq(A0);
     env->fpregs[new_fpstt].d = FP_CONVERT.d;
 #else
     env->fpregs[new_fpstt].d = ldfq(A0);
@@ -1822,7 +1820,7 @@ void helper_fildl_ST0_A0(void)
 {
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
 }
@@ -1831,7 +1829,7 @@ void helper_fildll_ST0_A0(void)
 {
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)ldq(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)lduq(A0));
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
 }
@@ -1872,10 +1870,10 @@ void OPPROTO op_fildl_ST0_A0(void)
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    FP_CONVERT.i32 = (int32_t) ldul(A0);
     env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i32;
 #else
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
 #endif
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
@@ -1886,10 +1884,10 @@ void OPPROTO op_fildll_ST0_A0(void)
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i64 = (int64_t) ldq(A0);
+    FP_CONVERT.i64 = (int64_t) lduq(A0);
     env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i64;
 #else
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)ldq(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)lduq(A0));
 #endif
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
Index: target-i386/ops_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/ops_mem.h,v
retrieving revision 1.7
diff -u -d -d -p -r1.7 ops_mem.h
--- target-i386/ops_mem.h	28 Nov 2005 21:02:17 -0000	1.7
+++ target-i386/ops_mem.h	16 Oct 2007 11:39:06 -0000
@@ -20,7 +20,7 @@ void OPPROTO glue(glue(op_ldsw, MEMSUFFI
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T0_A0)(void)
 {
-    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldub, MEMSUFFIX), _T1_A0)(void)
@@ -45,7 +45,7 @@ void OPPROTO glue(glue(op_ldsw, MEMSUFFI
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T1_A0)(void)
 {
-    T1 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+    T1 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_stb, MEMSUFFIX), _T0_A0)(void)
@@ -91,7 +91,7 @@ void OPPROTO glue(glue(op_ldq, MEMSUFFIX
 {
     uint64_t *p;
     p = (uint64_t *)((char *)env + PARAM1);
-    *p = glue(ldq, MEMSUFFIX)(A0);
+    *p = glue(lduq, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_stq, MEMSUFFIX), _env_A0)(void)
@@ -106,8 +106,8 @@ void OPPROTO glue(glue(op_ldo, MEMSUFFIX
 {
     XMMReg *p;
     p = (XMMReg *)((char *)env + PARAM1);
-    p->XMM_Q(0) = glue(ldq, MEMSUFFIX)(A0);
-    p->XMM_Q(1) = glue(ldq, MEMSUFFIX)(A0 + 8);
+    p->XMM_Q(0) = glue(lduq, MEMSUFFIX)(A0);
+    p->XMM_Q(1) = glue(lduq, MEMSUFFIX)(A0 + 8);
 }
 
 void OPPROTO glue(glue(op_sto, MEMSUFFIX), _env_A0)(void)
@@ -122,22 +122,22 @@ void OPPROTO glue(glue(op_sto, MEMSUFFIX
 #ifdef TARGET_X86_64
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T0_A0)(void)
 {
-    T0 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    T0 = glue(ldsl, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T1_A0)(void)
 {
-    T1 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    T1 = glue(ldsl, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T0_A0)(void)
 {
-    T0 = glue(ldq, MEMSUFFIX)(A0);
+    T0 = glue(lduq, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T1_A0)(void)
 {
-    T1 = glue(ldq, MEMSUFFIX)(A0);
+    T1 = glue(lduq, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_stq, MEMSUFFIX), _T0_A0)(void)
Index: target-i386/svm.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/svm.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 svm.h
--- target-i386/svm.h	23 Sep 2007 15:30:28 -0000	1.1
+++ target-i386/svm.h	16 Oct 2007 11:39:06 -0000
@@ -339,14 +339,14 @@ static inline int svm_check_intercept(un
     cpu_x86_load_seg_cache(env, \
                     R_##seg_index, \
                     lduw_phys(addr + offsetof(struct vmcb, save.seg.selector)),\
-                    ldq_phys(addr + offsetof(struct vmcb, save.seg.base)),\
-                    ldl_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
-                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), ldq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldl_phys(addr + offsetof(struct vmcb, save.seg.limit))))
+                    lduq_phys(addr + offsetof(struct vmcb, save.seg.base)),\
+                    ldul_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
+                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), lduq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldul_phys(addr + offsetof(struct vmcb, save.seg.limit))))
 
 #define SVM_LOAD_SEG2(addr, seg_qemu, seg_vmcb) \
     env->seg_qemu.selector  = lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.selector)); \
-    env->seg_qemu.base      = ldq_phys(addr + offsetof(struct vmcb, save.seg_vmcb.base)); \
-    env->seg_qemu.limit     = ldl_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
+    env->seg_qemu.base      = lduq_phys(addr + offsetof(struct vmcb, save.seg_vmcb.base)); \
+    env->seg_qemu.limit     = ldul_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
     env->seg_qemu.flags     = vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.attrib)), env->seg_qemu.base, env->seg_qemu.limit)
 
 #define SVM_SAVE_SEG(addr, seg_qemu, seg_vmcb) \
Index: target-i386/translate-copy.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/translate-copy.c,v
retrieving revision 1.9
diff -u -d -d -p -r1.9 translate-copy.c
--- target-i386/translate-copy.c	17 Sep 2007 08:09:52 -0000	1.9
+++ target-i386/translate-copy.c	16 Oct 2007 11:39:06 -0000
@@ -207,7 +207,7 @@ static inline void gen_lea_modrm(DisasCo
         case 0:
             if (base == 5) {
                 base = -1;
-                disp = ldl_code(s->pc);
+                disp = ldul_code(s->pc);
                 s->pc += 4;
             } else {
                 disp = 0;
@@ -218,7 +218,7 @@ static inline void gen_lea_modrm(DisasCo
             break;
         default:
         case 2:
-            disp = ldl_code(s->pc);
+            disp = ldul_code(s->pc);
             s->pc += 4;
             break;
         }
@@ -266,7 +266,7 @@ static inline uint32_t insn_get(DisasCon
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = ldul_code(s->pc);
         s->pc += 4;
         break;
     }
Index: target-i386/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/translate.c,v
retrieving revision 1.72
diff -u -d -d -p -r1.72 translate.c
--- target-i386/translate.c	27 Sep 2007 01:52:00 -0000	1.72
+++ target-i386/translate.c	16 Oct 2007 11:39:07 -0000
@@ -1462,7 +1462,7 @@ static void gen_lea_modrm(DisasContext *
         case 0:
             if ((base & 7) == 5) {
                 base = -1;
-                disp = (int32_t)ldl_code(s->pc);
+                disp = (int32_t)ldul_code(s->pc);
                 s->pc += 4;
                 if (CODE64(s) && !havesib) {
                     disp += s->pc + s->rip_offset;
@@ -1476,7 +1476,7 @@ static void gen_lea_modrm(DisasContext *
             break;
         default:
         case 2:
-            disp = ldl_code(s->pc);
+            disp = ldul_code(s->pc);
             s->pc += 4;
             break;
         }
@@ -1736,7 +1736,7 @@ static inline uint32_t insn_get(DisasCon
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = ldul_code(s->pc);
         s->pc += 4;
         break;
     }
@@ -4190,7 +4190,7 @@ static target_ulong disas_insn(DisasCont
                 ot = dflag + OT_WORD;
 #ifdef TARGET_X86_64
             if (s->aflag == 2) {
-                offset_addr = ldq_code(s->pc);
+                offset_addr = lduq_code(s->pc);
                 s->pc += 8;
                 if (offset_addr == (int32_t)offset_addr)
                     gen_op_movq_A0_im(offset_addr);
@@ -4243,7 +4243,7 @@ static target_ulong disas_insn(DisasCont
         if (dflag == 2) {
             uint64_t tmp;
             /* 64 bit case */
-            tmp = ldq_code(s->pc);
+            tmp = lduq_code(s->pc);
             s->pc += 8;
             reg = (b & 7) | REX_B(s);
             gen_movtl_T0_im(tmp);
Index: target-m68k/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/exec.h,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 exec.h
--- target-m68k/exec.h	14 Oct 2007 07:07:06 -0000	1.5
+++ target-m68k/exec.h	16 Oct 2007 11:39:07 -0000
@@ -42,6 +42,9 @@ int cpu_m68k_handle_mmu_fault (CPUState 
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_m68k_flush_flags(CPUM68KState *env, int cc_op);
Index: target-m68k/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/op_helper.c,v
retrieving revision 1.7
diff -u -d -d -p -r1.7 op_helper.c
--- target-m68k/op_helper.c	14 Oct 2007 07:07:06 -0000	1.7
+++ target-m68k/op_helper.c	16 Oct 2007 11:39:07 -0000
@@ -33,6 +33,21 @@ extern int semihosting_enabled;
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -44,6 +59,7 @@ extern int semihosting_enabled;
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
@@ -83,8 +99,8 @@ static void do_rte(void)
     uint32_t fmt;
 
     sp = env->aregs[7];
-    fmt = ldl_kernel(sp);
-    env->pc = ldl_kernel(sp + 4);
+    fmt = ldul_kernel(sp);
+    env->pc = ldul_kernel(sp + 4);
     sp |= (fmt >> 28) & 3;
     env->sr = fmt & 0xffff;
     m68k_switch_sp(env);
@@ -112,7 +128,7 @@ void do_interrupt(int is_hw)
                     && (env->sr & SR_S) != 0
                     && (env->pc & 3) == 0
                     && lduw_code(env->pc - 4) == 0x4e71
-                    && ldl_code(env->pc) == 0x4e7bf000) {
+                    && ldul_code(env->pc) == 0x4e7bf000) {
                 env->pc += 4;
                 do_m68k_semihosting(env, env->dregs[0]);
                 return;
@@ -153,7 +169,7 @@ void do_interrupt(int is_hw)
     stl_kernel(sp, fmt);
     env->aregs[7] = sp;
     /* Jump to vector.  */
-    env->pc = ldl_kernel(env->vbr + vector);
+    env->pc = ldul_kernel(env->vbr + vector);
 }
 
 #endif
Index: target-m68k/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/op_mem.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 op_mem.h
--- target-m68k/op_mem.h	23 May 2007 19:58:11 -0000	1.1
+++ target-m68k/op_mem.h	16 Oct 2007 11:39:07 -0000
@@ -11,7 +11,7 @@ MEM_LD_OP(8u32,ub)
 MEM_LD_OP(8s32,sb)
 MEM_LD_OP(16u32,uw)
 MEM_LD_OP(16s32,sw)
-MEM_LD_OP(32,l)
+MEM_LD_OP(32,ul)
 
 #undef MEM_LD_OP
 
Index: target-mips/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-mips/exec.h,v
retrieving revision 1.39
diff -u -d -d -p -r1.39 exec.h
--- target-mips/exec.h	14 Oct 2007 07:07:07 -0000	1.39
+++ target-mips/exec.h	16 Oct 2007 11:39:07 -0000
@@ -54,6 +54,9 @@ register target_ulong T2 asm(AREG3);
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 #if defined(TARGET_MIPSN32) || defined(TARGET_MIPS64)
Index: target-mips/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/op_helper.c,v
retrieving revision 1.66
diff -u -d -d -p -r1.66 op_helper.c
--- target-mips/op_helper.c	14 Oct 2007 07:07:07 -0000	1.66
+++ target-mips/op_helper.c	16 Oct 2007 11:39:07 -0000
@@ -544,6 +544,21 @@ static void do_unaligned_access (target_
 #define MMUSUFFIX _mmu
 #define ALIGNED_ONLY
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -555,6 +570,7 @@ static void do_unaligned_access (target_
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr)
 {
Index: target-mips/op_mem.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/op_mem.c,v
retrieving revision 1.14
diff -u -d -d -p -r1.14 op_mem.c
--- target-mips/op_mem.c	9 Oct 2007 03:12:08 -0000	1.14
+++ target-mips/op_mem.c	16 Oct 2007 11:39:07 -0000
@@ -57,13 +57,13 @@ void glue(op_sh, MEMSUFFIX) (void)
 
 void glue(op_lw, MEMSUFFIX) (void)
 {
-    T0 = glue(ldl, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 
 void glue(op_lwu, MEMSUFFIX) (void)
 {
-    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(T0);
+    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 
@@ -167,7 +167,7 @@ void glue(op_swr, MEMSUFFIX) (void)
 void glue(op_ll, MEMSUFFIX) (void)
 {
     T1 = T0;
-    T0 = glue(ldl, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)(T0);
     env->CP0_LLAddr = T1;
     RETURN();
 }
@@ -191,7 +191,7 @@ void glue(op_sc, MEMSUFFIX) (void)
 #if defined(TARGET_MIPSN32) || defined(TARGET_MIPS64)
 void glue(op_ld, MEMSUFFIX) (void)
 {
-    T0 = glue(ldq, MEMSUFFIX)(T0);
+    T0 = glue(lduq, MEMSUFFIX)(T0);
     RETURN();
 }
 
@@ -359,7 +359,7 @@ void glue(op_sdr, MEMSUFFIX) (void)
 void glue(op_lld, MEMSUFFIX) (void)
 {
     T1 = T0;
-    T0 = glue(ldq, MEMSUFFIX)(T0);
+    T0 = glue(lduq, MEMSUFFIX)(T0);
     env->CP0_LLAddr = T1;
     RETURN();
 }
@@ -383,7 +383,7 @@ void glue(op_scd, MEMSUFFIX) (void)
 
 void glue(op_lwc1, MEMSUFFIX) (void)
 {
-    WT0 = glue(ldl, MEMSUFFIX)(T0);
+    WT0 = glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 void glue(op_swc1, MEMSUFFIX) (void)
@@ -393,7 +393,7 @@ void glue(op_swc1, MEMSUFFIX) (void)
 }
 void glue(op_ldc1, MEMSUFFIX) (void)
 {
-    DT0 = glue(ldq, MEMSUFFIX)(T0);
+    DT0 = glue(lduq, MEMSUFFIX)(T0);
     RETURN();
 }
 void glue(op_sdc1, MEMSUFFIX) (void)
@@ -403,7 +403,7 @@ void glue(op_sdc1, MEMSUFFIX) (void)
 }
 void glue(op_luxc1, MEMSUFFIX) (void)
 {
-    DT0 = glue(ldq, MEMSUFFIX)(T0 & ~0x7);
+    DT0 = glue(lduq, MEMSUFFIX)(T0 & ~0x7);
     RETURN();
 }
 void glue(op_suxc1, MEMSUFFIX) (void)
Index: target-mips/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/translate.c,v
retrieving revision 1.106
diff -u -d -d -p -r1.106 translate.c
--- target-mips/translate.c	9 Oct 2007 03:39:58 -0000	1.106
+++ target-mips/translate.c	16 Oct 2007 11:39:07 -0000
@@ -6544,7 +6544,7 @@ gen_intermediate_code_internal (CPUState
             gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
             gen_opc_instr_start[lj] = 1;
         }
-        ctx.opcode = ldl_code(ctx.pc);
+        ctx.opcode = ldul_code(ctx.pc);
         decode_opc(env, &ctx);
         ctx.pc += 4;
 
Index: target-ppc/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/exec.h,v
retrieving revision 1.29
diff -u -d -d -p -r1.29 exec.h
--- target-ppc/exec.h	14 Oct 2007 07:07:07 -0000	1.29
+++ target-ppc/exec.h	16 Oct 2007 11:39:07 -0000
@@ -91,7 +91,12 @@ static always_inline target_ulong rotl64
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
+
+#include "softmmu_exec.h"
+#define REVERSE_ENDIAN
 #include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
+
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 void do_raise_exception_err (uint32_t exception, int error_code);
Index: target-ppc/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/helper.c,v
retrieving revision 1.80
diff -u -d -d -p -r1.80 helper.c
--- target-ppc/helper.c	14 Oct 2007 10:21:20 -0000	1.80
+++ target-ppc/helper.c	16 Oct 2007 11:39:07 -0000
@@ -554,8 +554,8 @@ static always_inline int _find_pte (mmu_
     for (i = 0; i < 8; i++) {
 #if defined(TARGET_PPC64)
         if (is_64b) {
-            pte0 = ldq_phys(base + (i * 16));
-            pte1 =  ldq_phys(base + (i * 16) + 8);
+            pte0 = lduq_phys(base + (i * 16));
+            pte1 =  lduq_phys(base + (i * 16) + 8);
             r = pte64_check(ctx, pte0, pte1, h, rw, type);
 #if defined (DEBUG_MMU)
             if (loglevel != 0) {
@@ -569,8 +569,8 @@ static always_inline int _find_pte (mmu_
         } else
 #endif
         {
-            pte0 = ldl_phys(base + (i * 8));
-            pte1 =  ldl_phys(base + (i * 8) + 4);
+            pte0 = ldul_phys(base + (i * 8));
+            pte1 =  ldul_phys(base + (i * 8) + 4);
             r = pte32_check(ctx, pte0, pte1, h, rw, type);
 #if defined (DEBUG_MMU)
             if (loglevel != 0) {
@@ -685,8 +685,8 @@ static int slb_lookup (CPUPPCState *env,
 #endif
     mask = 0x0000000000000000ULL; /* Avoid gcc warning */
     for (n = 0; n < env->slb_nr; n++) {
-        tmp64 = ldq_phys(sr_base);
-        tmp = ldl_phys(sr_base + 8);
+        tmp64 = lduq_phys(sr_base);
+        tmp = ldul_phys(sr_base + 8);
 #if defined(DEBUG_SLB)
         if (loglevel != 0) {
             fprintf(logfile, "%s: seg %d " PADDRX " %016" PRIx64 " %08"
@@ -733,7 +733,7 @@ void ppc_slb_invalidate_all (CPUPPCState
     do_invalidate = 0;
     sr_base = env->spr[SPR_ASR];
     for (n = 0; n < env->slb_nr; n++) {
-        tmp64 = ldq_phys(sr_base);
+        tmp64 = lduq_phys(sr_base);
         if (slb_is_valid(tmp64)) {
             slb_invalidate(&tmp64);
             stq_phys(sr_base, tmp64);
@@ -761,7 +761,7 @@ void ppc_slb_invalidate_one (CPUPPCState
     if (n >= 0) {
         sr_base = env->spr[SPR_ASR];
         sr_base += 12 * n;
-        tmp64 = ldq_phys(sr_base);
+        tmp64 = lduq_phys(sr_base);
         if (slb_is_valid(tmp64)) {
             slb_invalidate(&tmp64);
             stq_phys(sr_base, tmp64);
@@ -783,8 +783,8 @@ target_ulong ppc_load_slb (CPUPPCState *
 
     sr_base = env->spr[SPR_ASR];
     sr_base += 12 * slb_nr;
-    tmp64 = ldq_phys(sr_base);
-    tmp = ldl_phys(sr_base + 8);
+    tmp64 = lduq_phys(sr_base);
+    tmp = ldul_phys(sr_base + 8);
     if (tmp64 & 0x0000000008000000ULL) {
         /* SLB entry is valid */
         /* Copy SLB bits 62:88 to Rt 37:63 (VSID 23:49) */
@@ -990,10 +990,10 @@ static int get_segment (CPUState *env, m
                         sdr, mask + 0x80);
                 for (curaddr = sdr; curaddr < (sdr + mask + 0x80);
                      curaddr += 16) {
-                    a0 = ldl_phys(curaddr);
-                    a1 = ldl_phys(curaddr + 4);
-                    a2 = ldl_phys(curaddr + 8);
-                    a3 = ldl_phys(curaddr + 12);
+                    a0 = ldul_phys(curaddr);
+                    a1 = ldul_phys(curaddr + 4);
+                    a2 = ldul_phys(curaddr + 8);
+                    a3 = ldul_phys(curaddr + 12);
                     if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) {
                         fprintf(logfile,
                                 PADDRX ": %08x %08x %08x %08x\n",
@@ -2266,7 +2266,7 @@ static always_inline void powerpc_excp (
 #endif
         /* XXX: this is false */
         /* Get rS/rD and rA from faulting opcode */
-        env->spr[SPR_DSISR] |= (ldl_code((env->nip - 4)) & 0x03FF0000) >> 16;
+        env->spr[SPR_DSISR] |= (ldul_code((env->nip - 4)) & 0x03FF0000) >> 16;
         goto store_current;
     case POWERPC_EXCP_PROGRAM:   /* Program exception                        */
         switch (env->error_code & ~0xF) {
Index: target-ppc/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper.c,v
retrieving revision 1.51
diff -u -d -d -p -r1.51 op_helper.c
--- target-ppc/op_helper.c	14 Oct 2007 08:27:14 -0000	1.51
+++ target-ppc/op_helper.c	16 Oct 2007 11:39:08 -0000
@@ -2296,6 +2296,21 @@ DO_SPE_OP1(fsctuf);
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2307,6 +2322,7 @@ DO_SPE_OP1(fsctuf);
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-ppc/op_helper.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper.h,v
retrieving revision 1.21
diff -u -d -d -p -r1.21 op_helper.h
--- target-ppc/op_helper.h	7 Oct 2007 17:13:44 -0000	1.21
+++ target-ppc/op_helper.h	16 Oct 2007 11:39:08 -0000
@@ -37,19 +37,6 @@ void glue(do_POWER2_lfq_le, MEMSUFFIX) (
 void glue(do_POWER2_stfq, MEMSUFFIX) (void);
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void);
 
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst);
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stsw_64, MEMSUFFIX) (int src);
-void glue(do_stsw_le_64, MEMSUFFIX) (int src);
-void glue(do_lmw_64, MEMSUFFIX) (int dst);
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stmw_64, MEMSUFFIX) (int src);
-void glue(do_stmw_le_64, MEMSUFFIX) (int src);
-void glue(do_icbi_64, MEMSUFFIX) (void);
-void glue(do_dcbz_64, MEMSUFFIX) (void);
-#endif
-
 #else
 
 void do_print_mem_EA (target_ulong EA);
Index: target-ppc/op_helper_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper_mem.h,v
retrieving revision 1.14
diff -u -d -d -p -r1.14 op_helper_mem.h
--- target-ppc/op_helper_mem.h	7 Oct 2007 17:13:44 -0000	1.14
+++ target-ppc/op_helper_mem.h	16 Oct 2007 11:39:08 -0000
@@ -19,85 +19,33 @@
  */
 
 /* Multiple word / string load and store */
-static always_inline target_ulong glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000UL) >> 24) | ((tmp & 0x00FF0000UL) >> 8) |
-        ((tmp & 0x0000FF00UL) << 8) | ((tmp & 0x000000FFUL) << 24);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  target_ulong data)
-{
-    uint32_t tmp =
-        ((data & 0xFF000000UL) >> 24) | ((data & 0x00FF0000UL) >> 8) |
-        ((data & 0x0000FF00UL) << 8) | ((data & 0x000000FFUL) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
 void glue(do_lmw, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldul, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lmw_le, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldulr, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw_le, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_le_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lsw, MEMSUFFIX) (int dst)
 {
@@ -105,71 +53,33 @@ void glue(do_lsw, MEMSUFFIX) (int dst)
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst++] = glue(ldul, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
-#endif
 
 void glue(do_stsw, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 void glue(do_lsw_le, MEMSUFFIX) (int dst)
 {
@@ -177,71 +87,33 @@ void glue(do_lsw_le, MEMSUFFIX) (int dst
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst++] = glue(ldulr, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
-#endif
 
 void glue(do_stsw_le, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_le_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 /* Instruction cache invalidation helper */
 void glue(do_icbi, MEMSUFFIX) (void)
@@ -252,27 +124,11 @@ void glue(do_icbi, MEMSUFFIX) (void)
      * (not a fetch) by the MMU. To be sure it will be so,
      * do the load "by hand".
      */
-    tmp = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint32_t)T0,
-                             (uint32_t)(T0 + env->icache_line_size));
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_icbi_64, MEMSUFFIX) (void)
-{
-    uint64_t tmp;
-    /* Invalidate one cache line :
-     * PowerPC specification says this is to be treated like a load
-     * (not a fetch) by the MMU. To be sure it will be so,
-     * do the load "by hand".
-     */
-    tmp = glue(ldq, MEMSUFFIX)((uint64_t)T0);
+    tmp = glue(ldul, MEMSUFFIX)(T0);
     T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint64_t)T0,
-                             (uint64_t)(T0 + env->icache_line_size));
+    /* We assume it would not wrap around 2^32 on 32 bits targets */
+    tb_invalidate_page_range(T0, T0 + env->icache_line_size);
 }
-#endif
 
 void glue(do_dcbz, MEMSUFFIX) (void)
 {
@@ -281,90 +137,43 @@ void glue(do_dcbz, MEMSUFFIX) (void)
     /* XXX: should be 970 specific (?) */
     if (((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1)
         dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x1C), 0);
-    if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x3CUL), 0);
-        if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x7CUL), 0);
-        }
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_dcbz_64, MEMSUFFIX) (void)
-{
-    int dcache_line_size = env->dcache_line_size;
-
-    /* XXX: should be 970 specific (?) */
-    if (((env->spr[SPR_970_HID5] >> 6) & 0x3) == 0x2)
-        dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x1C), 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x00, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x04, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x08, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x0C, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x10, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x14, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x18, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x1C, 0);
     if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x3CUL), 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x20UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x24UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x28UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x2CUL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x30UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x34UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x38UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x3CUL, 0);
         if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x7CUL), 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x40UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x44UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x48UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x4CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x50UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x54UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x58UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x5CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x60UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x64UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x68UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x6CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x70UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x74UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x78UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x7CUL, 0);
         }
     }
 }
-#endif
 
 /* PowerPC 601 specific instructions (POWER bridge) */
 // XXX: to be tested
@@ -400,26 +209,6 @@ void glue(do_POWER2_lfq, MEMSUFFIX) (voi
     FT1 = glue(ldfq, MEMSUFFIX)((uint32_t)(T0 + 4));
 }
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 void glue(do_POWER2_lfq_le, MEMSUFFIX) (void)
 {
     FT0 = glue(ldfqr, MEMSUFFIX)((uint32_t)(T0 + 4));
@@ -432,25 +221,6 @@ void glue(do_POWER2_stfq, MEMSUFFIX) (vo
     glue(stfq, MEMSUFFIX)((uint32_t)(T0 + 4), FT1);
 }
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void)
 {
     glue(stfqr, MEMSUFFIX)((uint32_t)(T0 + 4), FT0);
Index: target-ppc/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_mem.h,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 op_mem.h
--- target-ppc/op_mem.h	7 Oct 2007 18:19:25 -0000	1.22
+++ target-ppc/op_mem.h	16 Oct 2007 11:39:08 -0000
@@ -18,85 +18,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-static always_inline uint16_t glue(ld16r, MEMSUFFIX) (target_ulong EA)
-{
-    uint16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline int32_t glue(ld16rs, MEMSUFFIX) (target_ulong EA)
-{
-    int16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return (int16_t)((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline uint32_t glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline uint64_t glue(ld64r, MEMSUFFIX) (target_ulong EA)
-{
-    uint64_t tmp = glue(ldq, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00000000000000ULL) >> 56) |
-        ((tmp & 0x00FF000000000000ULL) >> 40) |
-        ((tmp & 0x0000FF0000000000ULL) >> 24) |
-        ((tmp & 0x000000FF00000000ULL) >> 8) |
-        ((tmp & 0x00000000FF000000ULL) << 8) |
-        ((tmp & 0x0000000000FF0000ULL) << 24) |
-        ((tmp & 0x000000000000FF00ULL) << 40) |
-        ((tmp & 0x00000000000000FFULL) << 54);
-}
-#endif
-
-#if defined(TARGET_PPC64)
-static always_inline int64_t glue(ldsl, MEMSUFFIX) (target_ulong EA)
-{
-    return (int32_t)glue(ldl, MEMSUFFIX)(EA);
-}
-
-static always_inline int64_t glue(ld32rs, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return (int32_t)((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-#endif
-
-static always_inline void glue(st16r, MEMSUFFIX) (target_ulong EA,
-                                                  uint16_t data)
-{
-    uint16_t tmp = ((data & 0xFF00) >> 8) | ((data & 0x00FF) << 8);
-    glue(stw, MEMSUFFIX)(EA, tmp);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  uint32_t data)
-{
-    uint32_t tmp = ((data & 0xFF000000) >> 24) | ((data & 0x00FF0000) >> 8) |
-        ((data & 0x0000FF00) << 8) | ((data & 0x000000FF) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline void glue(st64r, MEMSUFFIX) (target_ulong EA,
-                                                  uint64_t data)
-{
-    uint64_t tmp = ((data & 0xFF00000000000000ULL) >> 56) |
-        ((data & 0x00FF000000000000ULL) >> 40) |
-        ((data & 0x0000FF0000000000ULL) >> 24) |
-        ((data & 0x000000FF00000000ULL) >> 8) |
-        ((data & 0x00000000FF000000ULL) << 8) |
-        ((data & 0x0000000000FF0000ULL) << 24) |
-        ((data & 0x000000000000FF00ULL) << 40) |
-        ((data & 0x00000000000000FFULL) << 56);
-    glue(stq, MEMSUFFIX)(EA, tmp);
-}
-#endif
-
 /***                             Integer load                              ***/
 #define PPC_LD_OP(name, op)                                                   \
 void OPPROTO glue(glue(op_l, name), MEMSUFFIX) (void)                         \
@@ -130,35 +51,37 @@ void OPPROTO glue(glue(glue(op_st, name)
 }
 #endif
 
+/* Native-endian fixed-point memory loads                                    */
 PPC_LD_OP(bz, ldub);
 PPC_LD_OP(ha, ldsw);
 PPC_LD_OP(hz, lduw);
-PPC_LD_OP(wz, ldl);
+PPC_LD_OP(wz, ldul);
 #if defined(TARGET_PPC64)
-PPC_LD_OP(d, ldq);
+PPC_LD_OP(d, lduq);
 PPC_LD_OP(wa, ldsl);
-PPC_LD_OP_64(d, ldq);
+PPC_LD_OP_64(d, lduq);
 PPC_LD_OP_64(wa, ldsl);
 PPC_LD_OP_64(bz, ldub);
 PPC_LD_OP_64(ha, ldsw);
 PPC_LD_OP_64(hz, lduw);
-PPC_LD_OP_64(wz, ldl);
+PPC_LD_OP_64(wz, ldul);
 #endif
 
-PPC_LD_OP(ha_le, ld16rs);
-PPC_LD_OP(hz_le, ld16r);
-PPC_LD_OP(wz_le, ld32r);
+/* Reverse-endian fixed-point memory loads                                   */
+PPC_LD_OP(ha_le, ldswr);
+PPC_LD_OP(hz_le, lduwr);
+PPC_LD_OP(wz_le, ldulr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP(d_le, ld64r);
-PPC_LD_OP(wa_le, ld32rs);
-PPC_LD_OP_64(d_le, ld64r);
-PPC_LD_OP_64(wa_le, ld32rs);
-PPC_LD_OP_64(ha_le, ld16rs);
-PPC_LD_OP_64(hz_le, ld16r);
-PPC_LD_OP_64(wz_le, ld32r);
+PPC_LD_OP(d_le, lduqr);
+PPC_LD_OP(wa_le, ldslr);
+PPC_LD_OP_64(d_le, lduqr);
+PPC_LD_OP_64(wa_le, ldslr);
+PPC_LD_OP_64(ha_le, ldswr);
+PPC_LD_OP_64(hz_le, lduwr);
+PPC_LD_OP_64(wz_le, ldulr);
 #endif
 
-/***                              Integer store                            ***/
+/* Native-endian fixed-point memory stores                                   */
 PPC_ST_OP(b, stb);
 PPC_ST_OP(h, stw);
 PPC_ST_OP(w, stl);
@@ -170,120 +93,110 @@ PPC_ST_OP_64(h, stw);
 PPC_ST_OP_64(w, stl);
 #endif
 
-PPC_ST_OP(h_le, st16r);
-PPC_ST_OP(w_le, st32r);
+/* Reverse-endian fixed-point memory stores                                  */
+PPC_ST_OP(h_le, stwr);
+PPC_ST_OP(w_le, stlr);
 #if defined(TARGET_PPC64)
-PPC_ST_OP(d_le, st64r);
-PPC_ST_OP_64(d_le, st64r);
-PPC_ST_OP_64(h_le, st16r);
-PPC_ST_OP_64(w_le, st32r);
+PPC_ST_OP(d_le, stqr);
+PPC_ST_OP_64(d_le, stqr);
+PPC_ST_OP_64(h_le, stwr);
+PPC_ST_OP_64(w_le, stlr);
 #endif
 
-/***                Integer load and store with byte reverse               ***/
-PPC_LD_OP(hbr, ld16r);
-PPC_LD_OP(wbr, ld32r);
-PPC_ST_OP(hbr, st16r);
-PPC_ST_OP(wbr, st32r);
+/* Native-endian fixed-point loads and stores with byte-reverse              */
+PPC_LD_OP(hbr, lduwr);
+PPC_LD_OP(wbr, ldulr);
+PPC_ST_OP(hbr, stwr);
+PPC_ST_OP(wbr, stlr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP_64(hbr, ld16r);
-PPC_LD_OP_64(wbr, ld32r);
-PPC_ST_OP_64(hbr, st16r);
-PPC_ST_OP_64(wbr, st32r);
+PPC_LD_OP_64(hbr, lduwr);
+PPC_LD_OP_64(wbr, ldulr);
+PPC_ST_OP_64(hbr, stwr);
+PPC_ST_OP_64(wbr, stlr);
 #endif
 
+/* Reverse-endian fixed-point loads and stores with byte-reverse             */
 PPC_LD_OP(hbr_le, lduw);
-PPC_LD_OP(wbr_le, ldl);
+PPC_LD_OP(wbr_le, ldul);
 PPC_ST_OP(hbr_le, stw);
 PPC_ST_OP(wbr_le, stl);
 #if defined(TARGET_PPC64)
 PPC_LD_OP_64(hbr_le, lduw);
-PPC_LD_OP_64(wbr_le, ldl);
+PPC_LD_OP_64(wbr_le, ldul);
 PPC_ST_OP_64(hbr_le, stw);
 PPC_ST_OP_64(wbr_le, stl);
 #endif
 
-/***                    Integer load and store multiple                    ***/
+/* Native-endian fixed-point loads and stores multiple                       */
 void OPPROTO glue(op_lmw, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
-{
-    glue(do_lmw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
-void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
 {
     glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
+#endif
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
+/* Reverse-endian fixed-point loads and stores multiple                      */
+void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
 {
-    glue(do_stmw_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
 void OPPROTO glue(op_stmw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
-{
-    glue(do_stmw_le_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-/***                    Integer load and store strings                     ***/
-void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw, MEMSUFFIX)(PARAM1);
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+    glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+/* Native-endian loads and stores string                                     */
+void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -303,6 +216,7 @@ void OPPROTO glue(op_lswx, MEMSUFFIX) (v
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
@@ -320,13 +234,44 @@ void OPPROTO glue(op_lswx_64, MEMSUFFIX)
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
+void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
+{
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
+/* Reverse-endian loads and stores string                                    */
+void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+{
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
 void OPPROTO glue(op_lswx_le, MEMSUFFIX) (void)
 {
     /* Note: T1 comes from xer_bc then no cast is needed */
@@ -337,6 +282,7 @@ void OPPROTO glue(op_lswx_le, MEMSUFFIX)
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
@@ -354,29 +300,16 @@ void OPPROTO glue(op_lswx_le_64, MEMSUFF
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
-{
-    glue(do_stsw, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
-{
-    glue(do_stsw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
 void OPPROTO glue(op_stsw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
@@ -384,7 +317,7 @@ void OPPROTO glue(op_stsw_le, MEMSUFFIX)
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_stsw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_stsw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -432,38 +365,9 @@ PPC_STF_OP_64(fs, stfs);
 PPC_STF_OP_64(fiwx, stfiwx);
 #endif
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 static always_inline void glue(stfsr, MEMSUFFIX) (target_ulong EA, double d)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = float64_to_float32(d, &env->fp_status);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stfl, MEMSUFFIX)(EA, u.f);
+    glue(stflr, MEMSUFFIX)(EA, float64_to_float32(d, &env->fp_status));
 }
 
 static always_inline void glue(stfiwxr, MEMSUFFIX) (target_ulong EA, double d)
@@ -475,11 +379,7 @@ static always_inline void glue(stfiwxr, 
 
     /* Store the low order 32 bits without any conversion */
     u.d = d;
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stl, MEMSUFFIX)(EA, u.u);
+    glue(stlr, MEMSUFFIX)(EA, u.u);
 }
 
 PPC_STF_OP(fd_le, stfqr);
@@ -520,40 +420,9 @@ PPC_LDF_OP_64(fd, ldfq);
 PPC_LDF_OP_64(fs, ldfs);
 #endif
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 static always_inline double glue(ldfsr, MEMSUFFIX) (target_ulong EA)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = glue(ldfl, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-
-    return float32_to_float64(u.f, &env->fp_status);
+    return float32_to_float64(glue(ldflr, MEMSUFFIX)(EA), &env->fp_status);
 }
 
 PPC_LDF_OP(fd_le, ldfqr);
@@ -569,7 +438,7 @@ void OPPROTO glue(op_lwarx, MEMSUFFIX) (
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -581,7 +450,7 @@ void OPPROTO glue(op_lwarx_64, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -592,7 +461,7 @@ void OPPROTO glue(op_ldarx, MEMSUFFIX) (
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldq, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(lduq, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -603,7 +472,7 @@ void OPPROTO glue(op_ldarx_64, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldq, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(lduq, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -615,7 +484,7 @@ void OPPROTO glue(op_lwarx_le, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -627,7 +496,7 @@ void OPPROTO glue(op_lwarx_le_64, MEMSUF
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -638,7 +507,7 @@ void OPPROTO glue(op_ldarx_le, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(lduqr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -649,7 +518,7 @@ void OPPROTO glue(op_ldarx_le_64, MEMSUF
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(lduqr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -731,7 +600,7 @@ void OPPROTO glue(op_stwcx_le, MEMSUFFIX
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -748,7 +617,7 @@ void OPPROTO glue(op_stwcx_le_64, MEMSUF
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -764,7 +633,7 @@ void OPPROTO glue(op_stdcx_le, MEMSUFFIX
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -780,7 +649,7 @@ void OPPROTO glue(op_stdcx_le_64, MEMSUF
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -862,6 +731,7 @@ void OPPROTO glue(op_dcbz_l128, MEMSUFFI
 
 void OPPROTO glue(op_dcbz, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
@@ -940,7 +810,7 @@ void OPPROTO glue(op_dcbz_l128_64, MEMSU
 
 void OPPROTO glue(op_dcbz_64, MEMSUFFIX) (void)
 {
-    glue(do_dcbz_64, MEMSUFFIX)();
+    glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -948,6 +818,7 @@ void OPPROTO glue(op_dcbz_64, MEMSUFFIX)
 /* Instruction cache block invalidate */
 void OPPROTO glue(op_icbi, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
@@ -955,7 +826,7 @@ void OPPROTO glue(op_icbi, MEMSUFFIX) (v
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_icbi_64, MEMSUFFIX) (void)
 {
-    glue(do_icbi_64, MEMSUFFIX)();
+    glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -963,14 +834,14 @@ void OPPROTO glue(op_icbi_64, MEMSUFFIX)
 /* External access */
 void OPPROTO glue(op_eciwx, MEMSUFFIX) (void)
 {
-    T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
@@ -991,28 +862,28 @@ void OPPROTO glue(op_ecowx_64, MEMSUFFIX
 
 void OPPROTO glue(op_eciwx_le, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_le_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
 
 void OPPROTO glue(op_ecowx_le, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_ecowx_le_64, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
     RETURN();
 }
 #endif
@@ -1064,14 +935,14 @@ void OPPROTO glue(op_POWER2_stfq_le, MEM
 #endif
 void OPPROTO glue(op_vr_lvx, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint32_t)T0);
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint32_t)T0 + 8);
+    AVR0.u64[VR_DWORD0] = glue(lduq, MEMSUFFIX)((uint32_t)T0);
+    AVR0.u64[VR_DWORD1] = glue(lduq, MEMSUFFIX)((uint32_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_lvx_le, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint32_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint32_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(lduqr, MEMSUFFIX)((uint32_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(lduqr, MEMSUFFIX)((uint32_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx, MEMSUFFIX) (void)
@@ -1082,21 +953,21 @@ void OPPROTO glue(op_vr_stvx, MEMSUFFIX)
 
 void OPPROTO glue(op_vr_stvx_le, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_vr_lvx_64, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint64_t)T0);
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint64_t)T0 + 8);
+    AVR0.u64[VR_DWORD0] = glue(lduq, MEMSUFFIX)((uint64_t)T0);
+    AVR0.u64[VR_DWORD1] = glue(lduq, MEMSUFFIX)((uint64_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_lvx_le_64, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint64_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint64_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(lduqr, MEMSUFFIX)((uint64_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(lduqr, MEMSUFFIX)((uint64_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx_64, MEMSUFFIX) (void)
@@ -1107,8 +978,8 @@ void OPPROTO glue(op_vr_stvx_64, MEMSUFF
 
 void OPPROTO glue(op_vr_stvx_le_64, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 #endif
 #undef VR_DWORD0
@@ -1161,16 +1032,16 @@ _PPC_SPE_ST_OP(name, op)
 #endif
 
 #if !defined(TARGET_PPC64)
-PPC_SPE_LD_OP(dd, ldq);
+PPC_SPE_LD_OP(dd, lduq);
 PPC_SPE_ST_OP(dd, stq);
-PPC_SPE_LD_OP(dd_le, ld64r);
-PPC_SPE_ST_OP(dd_le, st64r);
+PPC_SPE_LD_OP(dd_le, lduqr);
+PPC_SPE_ST_OP(dd_le, stqr);
 #endif
 static always_inline uint64_t glue(spe_ldw, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ldl, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ldl, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldul, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldul, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw, spe_ldw);
@@ -1184,16 +1055,16 @@ PPC_SPE_ST_OP(dw, spe_stdw);
 static always_inline uint64_t glue(spe_ldw_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld32r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld32r, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldulr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldulr, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw_le, spe_ldw_le);
 static always_inline void glue(spe_stdw_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data >> 32);
-    glue(st32r, MEMSUFFIX)(EA + 4, data);
+    glue(stlr, MEMSUFFIX)(EA, data >> 32);
+    glue(stlr, MEMSUFFIX)(EA + 4, data);
 }
 PPC_SPE_ST_OP(dw_le, spe_stdw_le);
 static always_inline uint64_t glue(spe_ldh, MEMSUFFIX) (target_ulong EA)
@@ -1218,20 +1089,20 @@ PPC_SPE_ST_OP(dh, spe_stdh);
 static always_inline uint64_t glue(spe_ldh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 4) << 16;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 6);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 4) << 16;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 6);
     return ret;
 }
 PPC_SPE_LD_OP(dh_le, spe_ldh_le);
 static always_inline void glue(spe_stdh_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 4, data >> 16);
-    glue(st16r, MEMSUFFIX)(EA + 6, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 4, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA + 6, data);
 }
 PPC_SPE_ST_OP(dh_le, spe_stdh_le);
 static always_inline uint64_t glue(spe_lwhe, MEMSUFFIX) (target_ulong EA)
@@ -1252,16 +1123,16 @@ PPC_SPE_ST_OP(whe, spe_stwhe);
 static always_inline uint64_t glue(spe_lwhe_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 16;
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 16;
     return ret;
 }
 PPC_SPE_LD_OP(whe_le, spe_lwhe_le);
 static always_inline void glue(spe_stwhe_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 16);
 }
 PPC_SPE_ST_OP(whe_le, spe_stwhe_le);
 static always_inline uint64_t glue(spe_lwhou, MEMSUFFIX) (target_ulong EA)
@@ -1290,24 +1161,24 @@ PPC_SPE_ST_OP(who, spe_stwho);
 static always_inline uint64_t glue(spe_lwhou_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2);
     return ret;
 }
 PPC_SPE_LD_OP(whou_le, spe_lwhou_le);
 static always_inline uint64_t glue(spe_lwhos_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = ((uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA))) << 32;
-    ret |= (uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA + 2));
+    ret = ((uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA))) << 32;
+    ret |= (uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA + 2));
     return ret;
 }
 PPC_SPE_LD_OP(whos_le, spe_lwhos_le);
 static always_inline void glue(spe_stwho_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 2, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 2, data);
 }
 PPC_SPE_ST_OP(who_le, spe_stwho_le);
 #if !defined(TARGET_PPC64)
@@ -1320,7 +1191,7 @@ PPC_SPE_ST_OP(wwo, spe_stwwo);
 static always_inline void glue(spe_stwwo_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data);
+    glue(stlr, MEMSUFFIX)(EA, data);
 }
 PPC_SPE_ST_OP(wwo_le, spe_stwwo_le);
 #endif
@@ -1334,14 +1205,14 @@ PPC_SPE_LD_OP(h, spe_lh);
 static always_inline uint64_t glue(spe_lh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 48) | ((uint64_t)tmp << 16);
 }
 PPC_SPE_LD_OP(h_le, spe_lh_le);
 static always_inline uint64_t glue(spe_lwwsplat, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ldl, MEMSUFFIX)(EA);
+    tmp = glue(ldul, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat, spe_lwwsplat);
@@ -1349,7 +1220,7 @@ static always_inline
 uint64_t glue(spe_lwwsplat_le, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ld32r, MEMSUFFIX)(EA);
+    tmp = glue(ldulr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat_le, spe_lwwsplat_le);
@@ -1369,9 +1240,9 @@ uint64_t glue(spe_lwhsplat_le, MEMSUFFIX
 {
     uint64_t ret;
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     ret = ((uint64_t)tmp << 48) | ((uint64_t)tmp << 32);
-    tmp = glue(ld16r, MEMSUFFIX)(EA + 2);
+    tmp = glue(lduwr, MEMSUFFIX)(EA + 2);
     ret |= ((uint64_t)tmp << 16) | (uint64_t)tmp;
     return ret;
 }
Index: target-ppc/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/translate.c,v
retrieving revision 1.93
diff -u -d -d -p -r1.93 translate.c
--- target-ppc/translate.c	14 Oct 2007 07:07:07 -0000	1.93
+++ target-ppc/translate.c	16 Oct 2007 11:39:08 -0000
@@ -6756,7 +6756,7 @@ static always_inline int gen_intermediat
                     ctx.nip, 1 - msr_pr, msr_ir);
         }
 #endif
-        ctx.opcode = ldl_code(ctx.nip);
+        ctx.opcode = ldul_code(ctx.nip);
         if (msr_le) {
             ctx.opcode = ((ctx.opcode & 0xFF000000) >> 24) |
                 ((ctx.opcode & 0x00FF0000) >> 8) |
Index: target-sh4/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/exec.h,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 exec.h
--- target-sh4/exec.h	14 Oct 2007 07:07:08 -0000	1.6
+++ target-sh4/exec.h	16 Oct 2007 11:39:08 -0000
@@ -48,6 +48,9 @@ static inline int cpu_halted(CPUState *e
 
 #ifndef CONFIG_USER_ONLY
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 #define RETURN() __asm__ __volatile__("")
Index: target-sh4/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/op_helper.c,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 op_helper.c
--- target-sh4/op_helper.c	14 Oct 2007 07:07:08 -0000	1.5
+++ target-sh4/op_helper.c	16 Oct 2007 11:39:08 -0000
@@ -30,6 +30,21 @@ void do_raise_exception(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -41,6 +56,7 @@ void do_raise_exception(void)
 
 #define SHIFT 3
; #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr)
 {
Index: target-sh4/op_mem.c
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/op_mem.c,v
retrieving revision 1.3
diff -u -d -d -p -r1.3 op_mem.c
--- target-sh4/op_mem.c	16 Sep 2007 21:08:05 -0000	1.3
+++ target-sh4/op_mem.c	16 Oct 2007 11:39:08 -0000
@@ -48,7 +48,7 @@ void glue(op_stw_T0_T1, MEMSUFFIX) (void
 }
 
 void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
-    T0 = glue(ldl, MEMSUFFIX) (T0);
+    T0 = glue(ldul, MEMSUFFIX) (T0);
     RETURN();
 }
 
Index: target-sparc/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/exec.h,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 exec.h
--- target-sparc/exec.h	14 Oct 2007 07:07:08 -0000	1.22
+++ target-sparc/exec.h	16 Oct 2007 11:39:08 -0000
@@ -100,6 +100,9 @@ void do_rdpsr();
 /* XXX: move that to a generic header */
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-sparc/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/helper.c,v
retrieving revision 1.28
diff -u -d -d -p -r1.28 helper.c
--- target-sparc/helper.c	14 Oct 2007 07:07:08 -0000	1.28
+++ target-sparc/helper.c	16 Oct 2007 11:39:08 -0000
@@ -130,7 +130,7 @@ int get_physical_address (CPUState *env,
     /* SPARC reference MMU table walk: Context table->L1->L2->PTE */
     /* Context base + context number */
     pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
 
     /* Ctx pde */
     switch (pde & PTE_ENTRYTYPE_MASK) {
@@ -142,7 +142,7 @@ int get_physical_address (CPUState *env,
         return 4 << 2;
     case 1: /* L0 PDE */
         pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
-        pde = ldl_phys(pde_ptr);
+        pde = ldul_phys(pde_ptr);
 
         switch (pde & PTE_ENTRYTYPE_MASK) {
         default:
@@ -152,7 +152,7 @@ int get_physical_address (CPUState *env,
             return (1 << 8) | (4 << 2);
         case 1: /* L1 PDE */
             pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
-            pde = ldl_phys(pde_ptr);
+            pde = ldul_phys(pde_ptr);
 
             switch (pde & PTE_ENTRYTYPE_MASK) {
             default:
@@ -162,7 +162,7 @@ int get_physical_address (CPUState *env,
                 return (2 << 8) | (4 << 2);
             case 1: /* L2 PDE */
                 pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
-                pde = ldl_phys(pde_ptr);
+                pde = ldul_phys(pde_ptr);
 
                 switch (pde & PTE_ENTRYTYPE_MASK) {
                 default:
@@ -266,7 +266,7 @@ target_ulong mmu_probe(CPUState *env, ta
     /* Context base + context number */
     pde_ptr = (target_phys_addr_t)(env->mmuregs[1] << 4) +
         (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
 
     switch (pde & PTE_ENTRYTYPE_MASK) {
     default:
@@ -278,7 +278,7 @@ target_ulong mmu_probe(CPUState *env, ta
         if (mmulev == 3)
             return pde;
         pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
-        pde = ldl_phys(pde_ptr);
+        pde = ldul_phys(pde_ptr);
 
         switch (pde & PTE_ENTRYTYPE_MASK) {
         default:
@@ -291,7 +291,7 @@ target_ulong mmu_probe(CPUState *env, ta
             if (mmulev == 2)
                 return pde;
             pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
-            pde = ldl_phys(pde_ptr);
+            pde = ldul_phys(pde_ptr);
 
             switch (pde & PTE_ENTRYTYPE_MASK) {
             default:
@@ -304,7 +304,7 @@ target_ulong mmu_probe(CPUState *env, ta
                 if (mmulev == 1)
                     return pde;
                 pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
-                pde = ldl_phys(pde_ptr);
+                pde = ldul_phys(pde_ptr);
 
                 switch (pde & PTE_ENTRYTYPE_MASK) {
                 default:
@@ -331,7 +331,7 @@ void dump_mmu(CPUState *env)
 
     printf("MMU dump:\n");
     pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
     printf("Root ptr: " TARGET_FMT_plx ", ctx: %d\n",
            (target_phys_addr_t)env->mmuregs[1] << 4, env->mmuregs[2]);
     for (n = 0, va = 0; n < 256; n++, va += 16 * 1024 * 1024) {
Index: target-sparc/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/op_helper.c,v
retrieving revision 1.42
diff -u -d -d -p -r1.42 op_helper.c
--- target-sparc/op_helper.c	14 Oct 2007 07:07:08 -0000	1.42
+++ target-sparc/op_helper.c	16 Oct 2007 11:39:08 -0000
@@ -184,11 +184,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_code(T0 & ~3);
+            ret = ldul_code(T0 & ~3);
             break;
         case 8:
-            ret = ldl_code(T0 & ~3);
-            T0 = ldl_code((T0 + 4) & ~3);
+            ret = ldul_code(T0 & ~3);
+            T0 = ldul_code((T0 + 4) & ~3);
             break;
         }
         break;
@@ -202,11 +202,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_user(T0 & ~3);
+            ret = ldul_user(T0 & ~3);
             break;
         case 8:
-            ret = ldl_user(T0 & ~3);
-            T0 = ldl_user((T0 + 4) & ~3);
+            ret = ldul_user(T0 & ~3);
+            T0 = ldul_user((T0 + 4) & ~3);
             break;
         }
         break;
@@ -220,11 +220,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_kernel(T0 & ~3);
+            ret = ldul_kernel(T0 & ~3);
             break;
         case 8:
-            ret = ldl_kernel(T0 & ~3);
-            T0 = ldl_kernel((T0 + 4) & ~3);
+            ret = ldul_kernel(T0 & ~3);
+            T0 = ldul_kernel((T0 + 4) & ~3);
             break;
         }
         break;
@@ -243,11 +243,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_phys(T0 & ~3);
+            ret = ldul_phys(T0 & ~3);
             break;
         case 8:
-            ret = ldl_phys(T0 & ~3);
-            T0 = ldl_phys((T0 + 4) & ~3);
+            ret = ldul_phys(T0 & ~3);
+            T0 = ldul_phys((T0 + 4) & ~3);
             break;
         }
         break;
@@ -264,13 +264,13 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32));
+            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
+                            | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         case 8:
-            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32));
-            T0 = ldl_phys((target_phys_addr_t)((T0 + 4) & ~3)
+            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
+                            | ((target_phys_addr_t)(asi & 0xf) << 32));
+            T0 = ldul_phys((target_phys_addr_t)((T0 + 4) & ~3)
                            | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         }
@@ -422,7 +422,7 @@ void helper_st_asi(int asi, int size)
             uint32_t src = T1 & ~3, dst = T0 & ~3, temp;
 
             for (i = 0; i < 32; i += 4, src += 4, dst += 4) {
-                temp = ldl_kernel(src);
+                temp = ldul_kernel(src);
                 stl_kernel(dst, temp);
             }
         }
@@ -514,6 +514,24 @@ void helper_ld_asi(int asi, int size, in
     switch (asi) {
     case 0x80: // Primary
     case 0x82: // Primary no-fault
+        {
+            switch(size) {
+            case 1:
+                ret = ldub_raw(T0);
+                break;
+            case 2:
+                ret = lduw_raw(T0 & ~1);
+                break;
+            case 4:
+                ret = ldul_raw(T0 & ~3);
+                break;
+            default:
+            case 8:
+                ret = lduq_raw(T0 & ~7);
+                break;
+            }
+        }
+        break;
     case 0x88: // Primary LE
     case 0x8a: // Primary no-fault LE
         {
@@ -522,14 +540,14 @@ void helper_ld_asi(int asi, int size, in
                 ret = ldub_raw(T0);
                 break;
             case 2:
-                ret = lduw_raw(T0 & ~1);
+                ret = lduwr_raw(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_raw(T0 & ~3);
+                ret = ldulr_raw(T0 & ~3);
                 break;
             default:
             case 8:
-                ret = ldq_raw(T0 & ~7);
+                ret = lduqr_raw(T0 & ~7);
                 break;
             }
         }
@@ -544,29 +562,6 @@ void helper_ld_asi(int asi, int size, in
         break;
     }
 
-    /* Convert from little endian */
-    switch (asi) {
-    case 0x88: // Primary LE
-    case 0x89: // Secondary LE
-    case 0x8a: // Primary no-fault LE
-    case 0x8b: // Secondary no-fault LE
-        switch(size) {
-        case 2:
-            ret = bswap16(ret);
-            break;
-        case 4:
-            ret = bswap32(ret);
-            break;
-        case 8:
-            ret = bswap64(ret);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
-    }
-
     /* Convert to signed number */
     if (sign) {
         switch(size) {
@@ -591,30 +586,8 @@ void helper_st_asi(int asi, int size)
     if (asi < 0x80)
         raise_exception(TT_PRIV_ACT);
 
-    /* Convert to little endian */
-    switch (asi) {
-    case 0x88: // Primary LE
-    case 0x89: // Secondary LE
-        switch(size) {
-        case 2:
-            T0 = bswap16(T0);
-            break;
-        case 4:
-            T0 = bswap32(T0);
-            break;
-        case 8:
-            T0 = bswap64(T0);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
-    }
-
     switch(asi) {
     case 0x80: // Primary
-    case 0x88: // Primary LE
         {
             switch(size) {
             case 1:
@@ -633,6 +606,25 @@ void helper_st_asi(int asi, int size)
             }
         }
         break;
+    case 0x88: // Primary LE
+        {
+            switch(size) {
+            case 1:
+                stb_raw(T0, T1);
+                break;
+            case 2:
+                stwr_raw(T0 & ~1, T1);
+                break;
+            case 4:
+                stlr_raw(T0 & ~3, T1);
+                break;
+            case 8:
+            default:
+                stqr_raw(T0 & ~7, T1);
+                break;
+            }
+        }
+        break;
     case 0x81: // Secondary
     case 0x89: // Secondary LE
         // XXX
@@ -659,11 +651,8 @@ void helper_ld_asi(int asi, int size, in
 
     switch (asi) {
     case 0x10: // As if user primary
-    case 0x18: // As if user primary LE
     case 0x80: // Primary
     case 0x82: // Primary no-fault
-    case 0x88: // Primary LE
-    case 0x8a: // Primary no-fault LE
         if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
             switch(size) {
             case 1:
@@ -673,11 +662,11 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_kernel(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_kernel(T0 & ~3);
+                ret = ldul_kernel(T0 & ~3);
                 break;
             default:
             case 8:
-                ret = ldq_kernel(T0 & ~7);
+                ret = lduq_kernel(T0 & ~7);
                 break;
             }
         } else {
@@ -689,17 +678,90 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_user(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_user(T0 & ~3);
+                ret = ldul_user(T0 & ~3);
                 break;
             default:
             case 8:
-                ret = ldq_user(T0 & ~7);
+                ret = lduq_user(T0 & ~7);
+                break;
+            }
+        }
+        break;
+    case 0x18: // As if user primary LE
+    case 0x88: // Primary LE
+    case 0x8a: // Primary no-fault LE
+        if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
+            if (env->hpstate & HS_PRIV) {
+                switch(size) {
+                case 1:
+                    ret = ldub_hypv(T0);
+                    break;
+                case 2:
+                    ret = lduwr_hypv(T0 & ~1);
+                    break;
+                case 4:
+                    ret = ldulr_hypv(T0 & ~3);
+                    break;
+                default:
+                case 8:
+                    ret = lduqr_hypv(T0 & ~7);
+                    break;
+                }
+            } else {
+                switch(size) {
+                case 1:
+                    ret = ldub_kernel(T0);
+                    break;
+                case 2:
+                    ret = lduwr_kernel(T0 & ~1);
+                    break;
+                case 4:
+                    ret = ldulr_kernel(T0 & ~3);
+                    break;
+                default:
+                case 8:
+                    ret = lduqr_kernel(T0 & ~7);
+                    break;
+                }
+            }
+        } else {
+            switch(size) {
+            case 1:
+                ret = ldub_user(T0);
+                break;
+            case 2:
+                ret = lduwr_user(T0 & ~1);
+                break;
+            case 4:
+                ret = ldulr_user(T0 & ~3);
+                break;
+            default:
+            case 8:
+                ret = lduqr_user(T0 & ~7);
                 break;
             }
         }
         break;
     case 0x14: // Bypass
     case 0x15: // Bypass, non-cacheable
+        {
+            switch(size) {
+            case 1:
+                ret = ldub_phys(T0);
+                break;
+            case 2:
+                ret = lduw_phys(T0 & ~1);
+                break;
+            case 4:
+                ret = ldul_phys(T0 & ~3);
+                break;
+            default:
+            case 8:
+                ret = lduq_phys(T0 & ~7);
+                break;
+            }
+            break;
+        }
     case 0x1c: // Bypass LE
     case 0x1d: // Bypass, non-cacheable LE
         {
@@ -708,14 +770,14 @@ void helper_ld_asi(int asi, int size, in
                 ret = ldub_phys(T0);
                 break;
             case 2:
-                ret = lduw_phys(T0 & ~1);
+                ret = bswap16(lduw_phys(T0 & ~1));
                 break;
             case 4:
-                ret = ldl_phys(T0 & ~3);
+                ret = bswap32(ldul_phys(T0 & ~3));
                 break;
             default:
             case 8:
-                ret = ldq_phys(T0 & ~7);
+                ret = bswap64(lduq_phys(T0 & ~7));
                 break;
             }
             break;
@@ -803,34 +865,6 @@ void helper_ld_asi(int asi, int size, in
         break;
     }
 
-    /* Convert from little endian */
-    switch (asi) {
-    case 0x0c: // Nucleus Little Endian (LE)
-    case 0x18: // As if user primary LE
-    case 0x19: // As if user secondary LE
-    case 0x1c: // Bypass LE
-    case 0x1d: // Bypass, non-cacheable LE
-    case 0x88: // Primary LE
-    case 0x89: // Secondary LE
-    case 0x8a: // Primary no-fault LE
-    case 0x8b: // Secondary no-fault LE
-        switch(size) {
-        case 2:
-            ret = bswap16(ret);
-            break;
-        case 4:
-            ret = bswap32(ret);
-            break;
-        case 8:
-            ret = bswap64(ret);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
-    }
-
     /* Convert to signed number */
     if (sign) {
         switch(size) {
@@ -855,37 +889,9 @@ void helper_st_asi(int asi, int size)
     if (asi < 0x80 && (env->pstate & PS_PRIV) == 0)
         raise_exception(TT_PRIV_ACT);
 
-    /* Convert to little endian */
-    switch (asi) {
-    case 0x0c: // Nucleus Little Endian (LE)
-    case 0x18: // As if user primary LE
-    case 0x19: // As if user secondary LE
-    case 0x1c: // Bypass LE
-    case 0x1d: // Bypass, non-cacheable LE
-    case 0x88: // Primary LE
-    case 0x89: // Secondary LE
-        switch(size) {
-        case 2:
-            T0 = bswap16(T0);
-            break;
-        case 4:
-            T0 = bswap32(T0);
-            break;
-        case 8:
-            T0 = bswap64(T0);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
-    }
-
     switch(asi) {
     case 0x10: // As if user primary
-    case 0x18: // As if user primary LE
     case 0x80: // Primary
-    case 0x88: // Primary LE
         if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
             switch(size) {
             case 1:
@@ -920,10 +926,62 @@ void helper_st_asi(int asi, int size)
             }
         }
         break;
+    case 0x18: // As if user primary LE
+    case 0x88: // Primary LE
+        if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
+            if (env->hpstate & HS_PRIV) {
+                switch(size) {
+                case 1:
+                    stb_hypv(T0, T1);
+                    break;
+                case 2:
+                    stwr_hypv(T0 & ~1, T1);
+                    break;
+                case 4:
+                    stlr_hypv(T0 & ~3, T1);
+                    break;
+                case 8:
+                default:
+                    stqr_hypv(T0 & ~7, T1);
+                    break;
+                }
+            } else {
+                switch(size) {
+                case 1:
+                    stb_kernel(T0, T1);
+                    break;
+                case 2:
+                    stwr_kernel(T0 & ~1, T1);
+                    break;
+                case 4:
+                    stlr_kernel(T0 & ~3, T1);
+                    break;
+                case 8:
+                default:
+                    stqr_kernel(T0 & ~7, T1);
+                    break;
+                }
+            }
+        } else {
+            switch(size) {
+            case 1:
+                stb_user(T0, T1);
+                break;
+            case 2:
+                stwr_user(T0 & ~1, T1);
+                break;
+            case 4:
+                stlr_user(T0 & ~3, T1);
+                break;
+            case 8:
+            default:
+                stqr_user(T0 & ~7, T1);
+                break;
+            }
+        }
+        break;
     case 0x14: // Bypass
     case 0x15: // Bypass, non-cacheable
-    case 0x1c: // Bypass LE
-    case 0x1d: // Bypass, non-cacheable LE
         {
             switch(size) {
             case 1:
@@ -942,6 +1000,26 @@ void helper_st_asi(int asi, int size)
             }
         }
         return;
+    case 0x1c: // Bypass LE
+    case 0x1d: // Bypass, non-cacheable LE
+        {
+            switch(size) {
+            case 1:
+                stb_phys(T0, T1);
+                break;
+            case 2:
+                stw_phys(T0 & ~1, bswap16(T1));
+                break;
+            case 4:
+                stl_phys(T0 & ~3, bswap32(T1));
+                break;
+            case 8:
+            default:
+                stq_phys(T0 & ~7, bswap64(T1));
+                break;
+            }
+        }
+        return;
     case 0x04: // Nucleus
     case 0x0c: // Nucleus Little Endian (LE)
     case 0x11: // As if user secondary
@@ -1497,6 +1575,21 @@ static void do_unaligned_access(target_u
 #define ALIGNED_ONLY
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1508,6 +1601,7 @@ static void do_unaligned_access(target_u
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access(target_ulong addr, int is_write, int is_user,
                                 void *retaddr)
Index: target-sparc/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/op_mem.h,v
retrieving revision 1.10
diff -u -d -d -p -r1.10 op_mem.h
--- target-sparc/op_mem.h	21 Sep 2007 19:10:53 -0000	1.10
+++ target-sparc/op_mem.h	16 Oct 2007 11:39:08 -0000
@@ -17,7 +17,7 @@ void OPPROTO glue(glue(op_, name), MEMSU
     glue(op, MEMSUFFIX)(T0, T1);                                      \
 }
 
-SPARC_LD_OP(ld, ldl);
+SPARC_LD_OP(ld, ldul);
 SPARC_LD_OP(ldub, ldub);
 SPARC_LD_OP(lduh, lduw);
 SPARC_LD_OP_S(ldsb, ldsb);
@@ -42,15 +42,15 @@ void OPPROTO glue(op_ldstub, MEMSUFFIX)(
 
 void OPPROTO glue(op_swap, MEMSUFFIX)(void)
 {
-    target_ulong tmp = glue(ldl, MEMSUFFIX)(T0);
+    target_ulong tmp = glue(ldul, MEMSUFFIX)(T0);
     glue(stl, MEMSUFFIX)(T0, T1);       /* XXX: Should be Atomically */
     T1 = tmp;
 }
 
 void OPPROTO glue(op_ldd, MEMSUFFIX)(void)
 {
-    T1 = glue(ldl, MEMSUFFIX)(T0);
-    T0 = glue(ldl, MEMSUFFIX)((T0 + 4));
+    T1 = glue(ldul, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)((T0 + 4));
 }
 
 /***                         Floating-point store                          ***/
@@ -76,17 +76,9 @@ void OPPROTO glue(op_lddf, MEMSUFFIX) (v
 }
 
 #ifdef TARGET_SPARC64
-void OPPROTO glue(op_lduw, MEMSUFFIX)(void)
-{
-    T1 = (uint64_t)(glue(ldl, MEMSUFFIX)(T0) & 0xffffffff);
-}
-
-void OPPROTO glue(op_ldsw, MEMSUFFIX)(void)
-{
-    T1 = (int64_t)(glue(ldl, MEMSUFFIX)(T0) & 0xffffffff);
-}
-
-SPARC_LD_OP(ldx, ldq);
+SPARC_LD_OP(lduw, ldul);
+SPARC_LD_OP(ldsw, ldsl);
+SPARC_LD_OP(ldx, lduq);
 SPARC_ST_OP(stx, stq);
 #endif
 #undef MEMSUFFIX
Index: target-sparc/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/translate.c,v
retrieving revision 1.75
diff -u -d -d -p -r1.75 translate.c
--- target-sparc/translate.c	14 Oct 2007 07:07:08 -0000	1.75
+++ target-sparc/translate.c	16 Oct 2007 11:39:08 -0000
@@ -1089,7 +1089,7 @@ static void disas_sparc_insn(DisasContex
 {
     unsigned int insn, opc, rs1, rs2, rd;
 
-    insn = ldl_code(dc->pc);
+    insn = ldul_code(dc->pc);
     opc = GET_FIELD(insn, 0, 1);
 
     rd = GET_FIELD(insn, 2, 6);

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-15 16:02     ` Blue Swirl
  2007-10-15 17:45       ` Blue Swirl
@ 2007-10-15 21:06       ` J. Mayer
  1 sibling, 0 replies; 20+ messages in thread
From: J. Mayer @ 2007-10-15 21:06 UTC (permalink / raw)
  To: qemu-devel

On Mon, 2007-10-15 at 19:02 +0300, Blue Swirl wrote:
> On 10/15/07, J. Mayer <l_indien@magic.fr> wrote:
> > On Sun, 2007-10-14 at 15:59 +0300, Blue Swirl wrote:
> > > On 10/14/07, J. Mayer <l_indien@magic.fr> wrote:
> > > > Here's an updated version of the patch against current CVS.
> > > > This patches provides reverse-endian, little-endian and big-endian
> > > > memory accessors, available with and without softmmu. It also provides
> > > > an IO_MEM_REVERSE TLB flag to allow future support of per-page
> > > > endianness control, which is required by some targets CPU emulations.
> > > > Having reverse-endian memory accessors also make it possible to optimise
> > > > reverse-endian memory access when the target CPU has dedicated
> > > > instructions. For now, it includes optimisations for the PowerPC target.
> > >
> > > This breaks Sparc32 softmmu, I get a black screen. Your changes to
> > > target-sparc and hw/sun4m.c look fine, so the problem could be in IO?
> >
> > Did it worked before my commits ? I may have done something wrong during
> > the merge...
> > I will do more checks and more tests...
> 
> If I disable the IOSWAP code, black screen is gone. I think this is
> logical: the io accessors return host CPU values, therefore no byte
> swapping need to be performed.

Memory mapped I/O access function hopefully return data in the target
endianness.
This is the reason why there are so many #ifdef TARGET_WORDS_BIGENDIAN
in the emulated devices memory mapped accesses routines and also in
io_read and io_write functions for 64 bits accesses.
And the emulated CPU is expecting data to always come in its endiannes
when doing a "load from memory", even if the access is a device one.

Your patch works as long as you don't use load/store with reverse endian accessor routines nor TLB wih reverse endian bit set.
On PowerPC, using reverse-endian load and stores, the byteswap in I/O routines is needed for most MMIO device accesses (like IDE, which always returns little-endian data) could ever be accessed.
The bug you report just means there's a logical error somewhere in my code. I did download the Sparc test and was able to reproduce it. I'm working to find the bug.
And I finally found it. The bug is just that I did something completelly stupid, defining IO_MEM_REVERSE as 3 instead of 4: it's obvious that it has to be a power of 2 to be combined with the other TB bits. I wonder how the PowerPC case was able to run with such a huge bug... Please apologive.
I'm going to do more test with this fix and try to merge the sparc_reverse_endian in my code and repost an updated patch.

-- 
J. Mayer <l_indien@magic.fr>
Never organized

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-15 16:02     ` Blue Swirl
@ 2007-10-15 17:45       ` Blue Swirl
  2007-10-16 20:27         ` J. Mayer
  2007-10-15 21:06       ` J. Mayer
  1 sibling, 1 reply; 20+ messages in thread
From: Blue Swirl @ 2007-10-15 17:45 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1538 bytes --]

On 10/15/07, Blue Swirl <blauwirbel@gmail.com> wrote:
> On 10/15/07, J. Mayer <l_indien@magic.fr> wrote:
> > On Sun, 2007-10-14 at 15:59 +0300, Blue Swirl wrote:
> > > On 10/14/07, J. Mayer <l_indien@magic.fr> wrote:
> > > > Here's an updated version of the patch against current CVS.
> > > > This patches provides reverse-endian, little-endian and big-endian
> > > > memory accessors, available with and without softmmu. It also provides
> > > > an IO_MEM_REVERSE TLB flag to allow future support of per-page
> > > > endianness control, which is required by some targets CPU emulations.
> > > > Having reverse-endian memory accessors also make it possible to optimise
> > > > reverse-endian memory access when the target CPU has dedicated
> > > > instructions. For now, it includes optimisations for the PowerPC target.
> > >
> > > This breaks Sparc32 softmmu, I get a black screen. Your changes to
> > > target-sparc and hw/sun4m.c look fine, so the problem could be in IO?
> >
> > Did it worked before my commits ? I may have done something wrong during
> > the merge...
> > I will do more checks and more tests...
>
> If I disable the IOSWAP code, black screen is gone. I think this is
> logical: the io accessors return host CPU values, therefore no byte
> swapping need to be performed.
>
> The attached version works for me.

This patch takes the reverse endian functions into use for Sparc.

I added hypervisor versions of the functions. This is getting a bit
ugly, time for #include magic? Physical versions could be useful too.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: sparc_reverse_endian.diff --]
[-- Type: text/x-diff; name="sparc_reverse_endian.diff", Size: 15509 bytes --]

Index: qemu/target-sparc/op_helper.c
===================================================================
--- qemu.orig/target-sparc/op_helper.c	2007-10-15 16:47:11.000000000 +0000
+++ qemu/target-sparc/op_helper.c	2007-10-15 17:34:28.000000000 +0000
@@ -649,8 +649,6 @@
     switch (asi) {
     case 0x80: // Primary
     case 0x82: // Primary no-fault
-    case 0x88: // Primary LE
-    case 0x8a: // Primary no-fault LE
         {
             switch(size) {
             case 1:
@@ -669,6 +667,26 @@
             }
         }
         break;
+    case 0x88: // Primary LE
+    case 0x8a: // Primary no-fault LE
+        {
+            switch(size) {
+            case 1:
+                ret = ldub_raw(T0);
+                break;
+            case 2:
+                ret = lduwr_raw(T0 & ~1);
+                break;
+            case 4:
+                ret = ldulr_raw(T0 & ~3);
+                break;
+            default:
+            case 8:
+                ret = ldqr_raw(T0 & ~7);
+                break;
+            }
+        }
+        break;
     case 0x81: // Secondary
     case 0x83: // Secondary no-fault
     case 0x89: // Secondary LE
@@ -679,29 +697,6 @@
         break;
     }
 
-    /* Convert from little endian */
-    switch (asi) {
-    case 0x88: // Primary LE
-    case 0x89: // Secondary LE
-    case 0x8a: // Primary no-fault LE
-    case 0x8b: // Secondary no-fault LE
-        switch(size) {
-        case 2:
-            ret = bswap16(ret);
-            break;
-        case 4:
-            ret = bswap32(ret);
-            break;
-        case 8:
-            ret = bswap64(ret);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
-    }
-
     /* Convert to signed number */
     if (sign) {
         switch(size) {
@@ -726,30 +721,8 @@
     if (asi < 0x80)
         raise_exception(TT_PRIV_ACT);
 
-    /* Convert to little endian */
-    switch (asi) {
-    case 0x88: // Primary LE
-    case 0x89: // Secondary LE
-        switch(size) {
-        case 2:
-            T0 = bswap16(T0);
-            break;
-        case 4:
-            T0 = bswap32(T0);
-            break;
-        case 8:
-            T0 = bswap64(T0);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
-    }
-
     switch(asi) {
     case 0x80: // Primary
-    case 0x88: // Primary LE
         {
             switch(size) {
             case 1:
@@ -768,6 +741,25 @@
             }
         }
         break;
+    case 0x88: // Primary LE
+        {
+            switch(size) {
+            case 1:
+                stb_raw(T0, T1);
+                break;
+            case 2:
+                stwr_raw(T0 & ~1, T1);
+                break;
+            case 4:
+                stlr_raw(T0 & ~3, T1);
+                break;
+            case 8:
+            default:
+                stqr_raw(T0 & ~7, T1);
+                break;
+            }
+        }
+        break;
     case 0x81: // Secondary
     case 0x89: // Secondary LE
         // XXX
@@ -795,11 +787,8 @@
 
     switch (asi) {
     case 0x10: // As if user primary
-    case 0x18: // As if user primary LE
     case 0x80: // Primary
     case 0x82: // Primary no-fault
-    case 0x88: // Primary LE
-    case 0x8a: // Primary no-fault LE
         if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
             if (env->hpstate & HS_PRIV) {
                 switch(size) {
@@ -852,10 +841,63 @@
             }
         }
         break;
+    case 0x18: // As if user primary LE
+    case 0x88: // Primary LE
+    case 0x8a: // Primary no-fault LE
+        if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
+            if (env->hpstate & HS_PRIV) {
+                switch(size) {
+                case 1:
+                    ret = ldub_hypv(T0);
+                    break;
+                case 2:
+                    ret = lduwr_hypv(T0 & ~1);
+                    break;
+                case 4:
+                    ret = ldulr_hypv(T0 & ~3);
+                    break;
+                default:
+                case 8:
+                    ret = ldqr_hypv(T0 & ~7);
+                    break;
+                }
+            } else {
+                switch(size) {
+                case 1:
+                    ret = ldub_kernel(T0);
+                    break;
+                case 2:
+                    ret = lduwr_kernel(T0 & ~1);
+                    break;
+                case 4:
+                    ret = ldulr_kernel(T0 & ~3);
+                    break;
+                default:
+                case 8:
+                    ret = ldqr_kernel(T0 & ~7);
+                    break;
+                }
+            }
+        } else {
+            switch(size) {
+            case 1:
+                ret = ldub_user(T0);
+                break;
+            case 2:
+                ret = lduwr_user(T0 & ~1);
+                break;
+            case 4:
+                ret = ldulr_user(T0 & ~3);
+                break;
+            default:
+            case 8:
+                ret = ldqr_user(T0 & ~7);
+                break;
+            }
+        }
+        break;
     case 0x14: // Bypass
     case 0x15: // Bypass, non-cacheable
-    case 0x1c: // Bypass LE
-    case 0x1d: // Bypass, non-cacheable LE
         {
             switch(size) {
             case 1:
@@ -874,6 +916,26 @@
             }
             break;
         }
+    case 0x1c: // Bypass LE
+    case 0x1d: // Bypass, non-cacheable LE
+        {
+            switch(size) {
+            case 1:
+                ret = ldub_phys(T0);
+                break;
+            case 2:
+                ret = bswap16(lduw_phys(T0 & ~1));
+                break;
+            case 4:
+                ret = bswap32(ldul_phys(T0 & ~3));
+                break;
+            default:
+            case 8:
+                ret = bswap64(ldq_phys(T0 & ~7));
+                break;
+            }
+            break;
+        }
     case 0x04: // Nucleus
     case 0x0c: // Nucleus Little Endian (LE)
     case 0x11: // As if user secondary
@@ -957,34 +1019,6 @@
         break;
     }
 
-    /* Convert from little endian */
-    switch (asi) {
-    case 0x0c: // Nucleus Little Endian (LE)
-    case 0x18: // As if user primary LE
-    case 0x19: // As if user secondary LE
-    case 0x1c: // Bypass LE
-    case 0x1d: // Bypass, non-cacheable LE
-    case 0x88: // Primary LE
-    case 0x89: // Secondary LE
-    case 0x8a: // Primary no-fault LE
-    case 0x8b: // Secondary no-fault LE
-        switch(size) {
-        case 2:
-            ret = bswap16(ret);
-            break;
-        case 4:
-            ret = bswap32(ret);
-            break;
-        case 8:
-            ret = bswap64(ret);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
-    }
-
     /* Convert to signed number */
     if (sign) {
         switch(size) {
@@ -1010,37 +1044,9 @@
         || (asi >= 0x30 && asi < 0x80) && !(env->hpstate & HS_PRIV))
         raise_exception(TT_PRIV_ACT);
 
-    /* Convert to little endian */
-    switch (asi) {
-    case 0x0c: // Nucleus Little Endian (LE)
-    case 0x18: // As if user primary LE
-    case 0x19: // As if user secondary LE
-    case 0x1c: // Bypass LE
-    case 0x1d: // Bypass, non-cacheable LE
-    case 0x88: // Primary LE
-    case 0x89: // Secondary LE
-        switch(size) {
-        case 2:
-            T0 = bswap16(T0);
-            break;
-        case 4:
-            T0 = bswap32(T0);
-            break;
-        case 8:
-            T0 = bswap64(T0);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
-    }
-
     switch(asi) {
     case 0x10: // As if user primary
-    case 0x18: // As if user primary LE
     case 0x80: // Primary
-    case 0x88: // Primary LE
         if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
             if (env->hpstate & HS_PRIV) {
                 switch(size) {
@@ -1093,10 +1099,62 @@
             }
         }
         break;
+    case 0x18: // As if user primary LE
+    case 0x88: // Primary LE
+        if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
+            if (env->hpstate & HS_PRIV) {
+                switch(size) {
+                case 1:
+                    stb_hypv(T0, T1);
+                    break;
+                case 2:
+                    stwr_hypv(T0 & ~1, T1);
+                    break;
+                case 4:
+                    stlr_hypv(T0 & ~3, T1);
+                    break;
+                case 8:
+                default:
+                    stqr_hypv(T0 & ~7, T1);
+                    break;
+                }
+            } else {
+                switch(size) {
+                case 1:
+                    stb_kernel(T0, T1);
+                    break;
+                case 2:
+                    stwr_kernel(T0 & ~1, T1);
+                    break;
+                case 4:
+                    stlr_kernel(T0 & ~3, T1);
+                    break;
+                case 8:
+                default:
+                    stqr_kernel(T0 & ~7, T1);
+                    break;
+                }
+            }
+        } else {
+            switch(size) {
+            case 1:
+                stb_user(T0, T1);
+                break;
+            case 2:
+                stwr_user(T0 & ~1, T1);
+                break;
+            case 4:
+                stlr_user(T0 & ~3, T1);
+                break;
+            case 8:
+            default:
+                stqr_user(T0 & ~7, T1);
+                break;
+            }
+        }
+        break;
     case 0x14: // Bypass
     case 0x15: // Bypass, non-cacheable
-    case 0x1c: // Bypass LE
-    case 0x1d: // Bypass, non-cacheable LE
         {
             switch(size) {
             case 1:
@@ -1115,6 +1173,26 @@
             }
         }
         return;
+    case 0x1c: // Bypass LE
+    case 0x1d: // Bypass, non-cacheable LE
+        {
+            switch(size) {
+            case 1:
+                stb_phys(T0, T1);
+                break;
+            case 2:
+                stw_phys(T0 & ~1, bswap16(T1));
+                break;
+            case 4:
+                stl_phys(T0 & ~3, bswap32(T1));
+                break;
+            case 8:
+            default:
+                stq_phys(T0 & ~7, bswap64(T1));
+                break;
+            }
+        }
+        return;
     case 0x04: // Nucleus
     case 0x0c: // Nucleus Little Endian (LE)
     case 0x11: // As if user secondary
Index: qemu/target-sparc/op_mem.h
===================================================================
--- qemu.orig/target-sparc/op_mem.h	2007-10-15 16:47:11.000000000 +0000
+++ qemu/target-sparc/op_mem.h	2007-10-15 17:00:16.000000000 +0000
@@ -82,16 +82,8 @@
 }
 
 #ifdef TARGET_SPARC64
-void OPPROTO glue(op_lduw, MEMSUFFIX)(void)
-{
-    T1 = (uint64_t)(glue(ldul, MEMSUFFIX)(ADDR(T0)) & 0xffffffff);
-}
-
-void OPPROTO glue(op_ldsw, MEMSUFFIX)(void)
-{
-    T1 = (int64_t)(glue(ldul, MEMSUFFIX)(ADDR(T0)) & 0xffffffff);
-}
-
+SPARC_LD_OP(lduw, ldul);
+SPARC_LD_OP(ldsw, ldsl);
 SPARC_LD_OP(ldx, ldq);
 SPARC_ST_OP(stx, stq);
 #endif
Index: qemu/cpu-all.h
===================================================================
--- qemu.orig/cpu-all.h	2007-10-15 17:10:15.000000000 +0000
+++ qemu/cpu-all.h	2007-10-15 17:27:47.000000000 +0000
@@ -1128,6 +1128,118 @@
 #define stfq_le_kernel(p, vt) stfq_kernel(p, vt)
 #endif
 
+/* native-endian */
+#define ldub_hypv(p) ldub_raw(p)
+#define ldsb_hypv(p) ldsb_raw(p)
+#define lduw_hypv(p) lduw_raw(p)
+#define ldsw_hypv(p) ldsw_raw(p)
+#define ldul_hypv(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_hypv(p) ldsl_raw(p)
+#endif
+#define ldq_hypv(p) ldq_raw(p)
+#define ldfl_hypv(p) ldfl_raw(p)
+#define ldfq_hypv(p) ldfq_raw(p)
+#define stb_hypv(p, v) stb_raw(p, v)
+#define stw_hypv(p, v) stw_raw(p, v)
+#define stl_hypv(p, v) stl_raw(p, v)
+#define stq_hypv(p, v) stq_raw(p, v)
+#define stfl_hypv(p, v) stfl_raw(p, v)
+#define stfq_hypv(p, vt) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr_hypv(p) ldub_raw(p)
+#define ldsbr_hypv(p) ldsb_raw(p)
+#define lduwr_hypv(p) lduwr_raw(p)
+#define ldswr_hypv(p) ldswr_raw(p)
+#define ldulr_hypv(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_hypv(p) ldslr_raw(p)
+#endif
+#define ldqr_hypv(p) ldqr_raw(p)
+#define ldflr_hypv(p) ldflr_raw(p)
+#define ldfqr_hypv(p) ldfqr_raw(p)
+#define stbr_hypv(p, v) stbr_raw(p, v)
+#define stwr_hypv(p, v) stwr_raw(p, v)
+#define stlr_hypv(p, v) stlr_raw(p, v)
+#define stqr_hypv(p, v) stqr_raw(p, v)
+#define stflr_hypv(p, v) stflr_raw(p, v)
+#define stfqr_hypv(p, vt) stfqr_raw(p, v)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be_hypv(p) ldub_hypv(p)
+#define ldsb_be_hypv(p) ldsb_hypv(p)
+#define lduw_be_hypv(p) lduw_hypv(p)
+#define ldsw_be_hypv(p) ldsw_hypv(p)
+#define ldul_be_hypv(p) ldul_hypv(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_hypv(p) ldsl_hypv(p)
+#endif
+#define ldq_be_hypv(p) ldq_hypv(p)
+#define ldfl_be_hypv(p) ldfl_hypv(p)
+#define ldfq_be_hypv(p) ldfq_hypv(p)
+#define stb_be_hypv(p, v) stb_hypv(p, v)
+#define stw_be_hypv(p, v) stw_hypv(p, v)
+#define stl_be_hypv(p, v) stl_hypv(p, v)
+#define stq_be_hypv(p, v) stq_hypv(p, v)
+#define stfl_be_hypv(p, v) stfl_hypv(p, v)
+#define stfq_be_hypv(p, vt) stfq_hypv(p, vt)
+/* little-endian */
+#define ldub_le_hypv(p) ldubr_hypv(p)
+#define ldsb_le_hypv(p) ldsbr_hypv(p)
+#define lduw_le_hypv(p) lduwr_hypv(p)
+#define ldsw_le_hypv(p) ldswr_hypv(p)
+#define ldul_le_hypv(p) ldulr_hypv(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_hypv(p) ldslr_hypv(p)
+#endif
+#define ldq_le_hypv(p) ldqr_hypv(p)
+#define ldfl_le_hypv(p) ldflr_hypv(p)
+#define ldfq_le_hypv(p) ldfqr_hypv(p)
+#define stb_le_hypv(p, v) stbr_hypv(p, v)
+#define stw_le_hypv(p, v) stwr_hypv(p, v)
+#define stl_le_hypv(p, v) stlr_hypv(p, v)
+#define stq_le_hypv(p, v) stqr_hypv(p, v)
+#define stfl_le_hypv(p, v) stflr_hypv(p, v)
+#define stfq_le_hypv(p, vt) stfqr_hypv(p, vt)
+#else
+/* big-endian */
+#define ldub_be_hypv(p) ldubr_hypv(p)
+#define ldsb_be_hypv(p) ldsbr_hypv(p)
+#define lduw_be_hypv(p) lduwr_hypv(p)
+#define ldsw_be_hypv(p) ldswr_hypv(p)
+#define ldul_be_hypv(p) ldulr_hypv(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_hypv(p) ldslr_hypv(p)
+#endif
+#define ldq_be_hypv(p) ldqr_hypv(p)
+#define ldfl_be_hypv(p) ldflr_hypv(p)
+#define ldfq_be_hypv(p) ldfqr_hypv(p)
+#define stb_be_hypv(p, v) stbr_hypv(p, v)
+#define stw_be_hypv(p, v) stwr_hypv(p, v)
+#define stl_be_hypv(p, v) stlr_hypv(p, v)
+#define stq_be_hypv(p, v) stqr_hypv(p, v)
+#define stfl_be_hypv(p, v) stflr_hypv(p, v)
+#define stfq_be_hypv(p, vt) stfqr_hypv(p, vt)
+/* little-endian */
+#define ldub_le_hypv(p) ldub_hypv(p)
+#define ldsb_le_hypv(p) ldsb_hypv(p)
+#define lduw_le_hypv(p) lduw_hypv(p)
+#define ldsw_le_hypv(p) ldsw_hypv(p)
+#define ldul_le_hypv(p) ldul_hypv(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_hypv(p) ldsl_hypv(p)
+#endif
+#define ldq_le_hypv(p) ldq_hypv(p)
+#define ldfl_le_hypv(p) ldfl_hypv(p)
+#define ldfq_le_hypv(p) ldfq_hypv(p)
+#define stb_le_hypv(p, v) stb_hypv(p, v)
+#define stw_le_hypv(p, v) stw_hypv(p, v)
+#define stl_le_hypv(p, v) stl_hypv(p, v)
+#define stq_le_hypv(p, v) stq_hypv(p, v)
+#define stfl_le_hypv(p, v) stfl_hypv(p, v)
+#define stfq_le_hypv(p, vt) stfq_hypv(p, vt)
+#endif
+
 #endif /* defined(CONFIG_USER_ONLY) */
 
 /* page related stuff */

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-15 12:10   ` J. Mayer
@ 2007-10-15 16:02     ` Blue Swirl
  2007-10-15 17:45       ` Blue Swirl
  2007-10-15 21:06       ` J. Mayer
  0 siblings, 2 replies; 20+ messages in thread
From: Blue Swirl @ 2007-10-15 16:02 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1232 bytes --]

On 10/15/07, J. Mayer <l_indien@magic.fr> wrote:
> On Sun, 2007-10-14 at 15:59 +0300, Blue Swirl wrote:
> > On 10/14/07, J. Mayer <l_indien@magic.fr> wrote:
> > > Here's an updated version of the patch against current CVS.
> > > This patches provides reverse-endian, little-endian and big-endian
> > > memory accessors, available with and without softmmu. It also provides
> > > an IO_MEM_REVERSE TLB flag to allow future support of per-page
> > > endianness control, which is required by some targets CPU emulations.
> > > Having reverse-endian memory accessors also make it possible to optimise
> > > reverse-endian memory access when the target CPU has dedicated
> > > instructions. For now, it includes optimisations for the PowerPC target.
> >
> > This breaks Sparc32 softmmu, I get a black screen. Your changes to
> > target-sparc and hw/sun4m.c look fine, so the problem could be in IO?
>
> Did it worked before my commits ? I may have done something wrong during
> the merge...
> I will do more checks and more tests...

If I disable the IOSWAP code, black screen is gone. I think this is
logical: the io accessors return host CPU values, therefore no byte
swapping need to be performed.

The attached version works for me.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: softmmu_reverse_endian.diff --]
[-- Type: text/x-diff; name="softmmu_reverse_endian.diff", Size: 167738 bytes --]

Index: cpu-all.h
===================================================================
--- cpu-all.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ cpu-all.h	2007-10-15 15:41:09.000000000 +0000
@@ -161,9 +161,9 @@
  *
  * endian is:
  * (empty): target cpu endianness or 8 bit access
- *   r    : reversed target cpu endianness (not implemented yet)
- *   be   : big endian (not implemented yet)
- *   le   : little endian (not implemented yet)
+ *   r    : reversed target cpu endianness
+ *   be   : big endian
+ *   le   : little endian
  *
  * access_type is:
  *   raw    : host memory access
@@ -215,7 +215,32 @@
 #endif
 }
 
-static inline int ldl_le_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return (uint32_t)val;
+#else
+    uint8_t *p = ptr;
+    return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+#endif
+}
+
+static inline int64_t ldsl_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return (int32_t)val;
+#else
+    uint8_t *p = ptr;
+    return (int32_t)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24));
+#endif
+}
+#else
+static inline int ldul_le_p(void *ptr)
 {
 #ifdef __powerpc__
     int val;
@@ -226,13 +251,14 @@
     return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
 #endif
 }
+#endif
 
 static inline uint64_t ldq_le_p(void *ptr)
 {
     uint8_t *p = ptr;
     uint32_t v1, v2;
-    v1 = ldl_le_p(p);
-    v2 = ldl_le_p(p + 4);
+    v1 = ldul_le_p(p);
+    v2 = ldul_le_p(p + 4);
     return v1 | ((uint64_t)v2 << 32);
 }
 
@@ -275,7 +301,7 @@
         float32 f;
         uint32_t i;
     } u;
-    u.i = ldl_le_p(ptr);
+    u.i = ldul_le_p(ptr);
     return u.f;
 }
 
@@ -292,8 +318,8 @@
 static inline float64 ldfq_le_p(void *ptr)
 {
     CPU_DoubleU u;
-    u.l.lower = ldl_le_p(ptr);
-    u.l.upper = ldl_le_p(ptr + 4);
+    u.l.lower = ldul_le_p(ptr);
+    u.l.upper = ldul_le_p(ptr + 4);
     return u.d;
 }
 
@@ -317,10 +343,22 @@
     return *(int16_t *)ptr;
 }
 
-static inline int ldl_le_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_le_p(void *ptr)
+{
+    return *(uint32_t *)ptr;
+}
+
+static inline int64_t ldsl_le_p(void *ptr)
+{
+    return *(int32_t *)ptr;
+}
+#else
+static inline int ldul_le_p(void *ptr)
 {
     return *(uint32_t *)ptr;
 }
+#endif
 
 static inline uint64_t ldq_le_p(void *ptr)
 {
@@ -397,7 +435,38 @@
 #endif
 }
 
-static inline int ldl_be_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_be_p(void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+    int val;
+    asm volatile ("movl %1, %0\n"
+                  "bswap %0\n"
+                  : "=r" (val)
+                  : "m" (*(uint32_t *)ptr));
+    return (uint32_t)val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
+#endif
+}
+
+static inline int64_t ldsl_be_p(void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+    int val;
+    asm volatile ("movl %1, %0\n"
+                  "bswap %0\n"
+                  : "=r" (val)
+                  : "m" (*(uint32_t *)ptr));
+    return (int32_t)val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return (int32_t)((b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]);
+#endif
+}
+#else
+static inline int ldul_be_p(void *ptr)
 {
 #if defined(__i386__) || defined(__x86_64__)
     int val;
@@ -411,12 +480,13 @@
     return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
 #endif
 }
+#endif
 
 static inline uint64_t ldq_be_p(void *ptr)
 {
     uint32_t a,b;
-    a = ldl_be_p(ptr);
-    b = ldl_be_p(ptr+4);
+    a = ldul_be_p(ptr);
+    b = ldul_be_p(ptr+4);
     return (((uint64_t)a<<32)|b);
 }
 
@@ -464,7 +534,7 @@
         float32 f;
         uint32_t i;
     } u;
-    u.i = ldl_be_p(ptr);
+    u.i = ldul_be_p(ptr);
     return u.f;
 }
 
@@ -481,8 +551,8 @@
 static inline float64 ldfq_be_p(void *ptr)
 {
     CPU_DoubleU u;
-    u.l.upper = ldl_be_p(ptr);
-    u.l.lower = ldl_be_p(ptr + 4);
+    u.l.upper = ldul_be_p(ptr);
+    u.l.lower = ldul_be_p(ptr + 4);
     return u.d;
 }
 
@@ -506,11 +576,23 @@
     return *(int16_t *)ptr;
 }
 
-static inline int ldl_be_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_be_p(void *ptr)
 {
     return *(uint32_t *)ptr;
 }
 
+static inline int64_t ldsl_be_p(void *ptr)
+{
+    return *(int32_t *)ptr;
+}
+#else
+static inline int ldul_be_p(void *ptr)
+{
+    return *(uint32_t *)ptr;
+}
+#endif
+
 static inline uint64_t ldq_be_p(void *ptr)
 {
     return *(uint64_t *)ptr;
@@ -557,9 +639,13 @@
 
 /* target CPU memory access functions */
 #if defined(TARGET_WORDS_BIGENDIAN)
+/* native-endian */
 #define lduw_p(p) lduw_be_p(p)
 #define ldsw_p(p) ldsw_be_p(p)
-#define ldl_p(p) ldl_be_p(p)
+#define ldul_p(p) ldul_be_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_p(p) ldsl_be_p(p)
+#endif
 #define ldq_p(p) ldq_be_p(p)
 #define ldfl_p(p) ldfl_be_p(p)
 #define ldfq_p(p) ldfq_be_p(p)
@@ -568,10 +654,29 @@
 #define stq_p(p, v) stq_be_p(p, v)
 #define stfl_p(p, v) stfl_be_p(p, v)
 #define stfq_p(p, v) stfq_be_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_le_p(p)
+#define ldswr_p(p) ldsw_le_p(p)
+#define ldulr_p(p) ldul_le_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_p(p) ldsl_le_p(p)
+#endif
+#define ldqr_p(p) ldq_le_p(p)
+#define ldflr_p(p) ldfl_le_p(p)
+#define ldfqr_p(p) ldfq_le_p(p)
+#define stwr_p(p, v) stw_le_p(p, v)
+#define stlr_p(p, v) stl_le_p(p, v)
+#define stqr_p(p, v) stq_le_p(p, v)
+#define stflr_p(p, v) stfl_le_p(p, v)
+#define stfqr_p(p, v) stfq_le_p(p, v)
 #else
+/* native-endian */
 #define lduw_p(p) lduw_le_p(p)
 #define ldsw_p(p) ldsw_le_p(p)
-#define ldl_p(p) ldl_le_p(p)
+#define ldul_p(p) ldul_le_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_p(p) ldsl_le_p(p)
+#endif
 #define ldq_p(p) ldq_le_p(p)
 #define ldfl_p(p) ldfl_le_p(p)
 #define ldfq_p(p) ldfq_le_p(p)
@@ -580,6 +685,21 @@
 #define stq_p(p, v) stq_le_p(p, v)
 #define stfl_p(p, v) stfl_le_p(p, v)
 #define stfq_p(p, v) stfq_le_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_be_p(p)
+#define ldswr_p(p) ldsw_be_p(p)
+#define ldulr_p(p) ldul_be_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_p(p) ldsl_be_p(p)
+#endif
+#define ldqr_p(p) ldq_be_p(p)
+#define ldflr_p(p) ldfl_be_p(p)
+#define ldfqr_p(p) ldfq_be_p(p)
+#define stwr_p(p, v) stw_be_p(p, v)
+#define stlr_p(p, v) stl_be_p(p, v)
+#define stqr_p(p, v) stq_be_p(p, v)
+#define stflr_p(p, v) stfl_be_p(p, v)
+#define stfqr_p(p, v) stfq_be_p(p, v)
 #endif
 
 /* MMU memory access macros */
@@ -605,11 +725,15 @@
 #define laddr(x) (uint8_t *)(long)(x)
 #endif
 
+/* native-endian */
 #define ldub_raw(p) ldub_p(laddr((p)))
 #define ldsb_raw(p) ldsb_p(laddr((p)))
 #define lduw_raw(p) lduw_p(laddr((p)))
 #define ldsw_raw(p) ldsw_p(laddr((p)))
-#define ldl_raw(p) ldl_p(laddr((p)))
+#define ldul_raw(p) ldul_p(laddr((p)))
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_raw(p) ldsl_p(laddr((p)))
+#endif
 #define ldq_raw(p) ldq_p(laddr((p)))
 #define ldfl_raw(p) ldfl_p(laddr((p)))
 #define ldfq_raw(p) ldfq_p(laddr((p)))
@@ -619,16 +743,112 @@
 #define stq_raw(p, v) stq_p(saddr((p)), v)
 #define stfl_raw(p, v) stfl_p(saddr((p)), v)
 #define stfq_raw(p, v) stfq_p(saddr((p)), v)
-
+/* reverse endian */
+#define ldubr_raw(p) ldub_p(laddr((p)))
+#define ldsbr_raw(p) ldsb_p(laddr((p)))
+#define lduwr_raw(p) lduwr_p(laddr((p)))
+#define ldswr_raw(p) ldswr_p(laddr((p)))
+#define ldulr_raw(p) ldulr_p(laddr((p)))
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_raw(p) ldslr_p(laddr((p)))
+#endif
+#define ldqr_raw(p) ldqr_p(laddr((p)))
+#define ldflr_raw(p) ldflr_p(laddr((p)))
+#define ldfqr_raw(p) ldfqr_p(laddr((p)))
+#define stbr_raw(p, v) stb_p(saddr((p)), v)
+#define stwr_raw(p, v) stwr_p(saddr((p)), v)
+#define stlr_raw(p, v) stlr_p(saddr((p)), v)
+#define stqr_raw(p, v) stqr_p(saddr((p)), v)
+#define stflr_raw(p, v) stflr_p(saddr((p)), v)
+#define stfqr_raw(p, v) stfqr_p(saddr((p)), v)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be_raw(p) ldub_raw(p)
+#define ldsb_be_raw(p) ldsb_raw(p)
+#define lduw_be_raw(p) lduw_raw(p)
+#define ldsw_be_raw(p) ldsw_raw(p)
+#define ldul_be_raw(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_raw(p) ldsl_raw(p)
+#endif
+#define ldq_be_raw(p) ldq_raw(p)
+#define ldfl_be_raw(p) ldfl_raw(p)
+#define ldfq_be_raw(p) ldfq_raw(p)
+#define stb_be_raw(p, v) stb_raw(p, v)
+#define stw_be_raw(p, v) stw_raw(p, v)
+#define stl_be_raw(p, v) stl_raw(p, v)
+#define stq_be_raw(p, v) stq_raw(p, v)
+#define stfl_be_raw(p, v) stfl_raw(p, v)
+#define stfq_be_raw(p, v) stfq_raw(p, v)
+/* little-endian */
+#define ldub_le_raw(p) ldubr_raw(p)
+#define ldsb_le_raw(p) ldsbr_raw(p)
+#define lduw_le_raw(p) lduwr_raw(p)
+#define ldsw_le_raw(p) ldswr_raw(p)
+#define ldul_le_raw(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_raw(p) ldslr_raw(p)
+#endif
+#define ldq_le_raw(p) ldqr_raw(p)
+#define ldfl_le_raw(p) ldflr_raw(p)
+#define ldfq_le_raw(p) ldfqr_raw(p)
+#define stb_le_raw(p, v) stbr_raw(p, v)
+#define stw_le_raw(p, v) stwr_raw(p, v)
+#define stl_le_raw(p, v) stlr_raw(p, v)
+#define stq_le_raw(p, v) stqr_raw(p, v)
+#define stfl_le_raw(p, v) stflr_raw(p, v)
+#define stfq_le_raw(p, v) stfqr_raw(p, v)
+#else
+/* big-endian */
+#define ldub_be_raw(p) ldubr_raw(p)
+#define ldsb_be_raw(p) ldsbr_raw(p)
+#define lduw_be_raw(p) lduwr_raw(p)
+#define ldsw_be_raw(p) ldswr_raw(p)
+#define ldul_be_raw(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_raw(p) ldslr_raw(p)
+#endif
+#define ldq_be_raw(p) ldqr_raw(p)
+#define ldfl_be_raw(p) ldflr_raw(p)
+#define ldfq_be_raw(p) ldfqr_raw(p)
+#define stb_be_raw(p, v) stbr_raw(p, v)
+#define stw_be_raw(p, v) stwr_raw(p, v)
+#define stl_be_raw(p, v) stlr_raw(p, v)
+#define stq_be_raw(p, v) stqr_raw(p, v)
+#define stfl_be_raw(p, v) stflr_raw(p, v)
+#define stfq_be_raw(p, v) stfqr_raw(p, v)
+/* little-endian */
+#define ldub_le_raw(p) ldub_raw(p)
+#define ldsb_le_raw(p) ldsb_raw(p)
+#define lduw_le_raw(p) lduw_raw(p)
+#define ldsw_le_raw(p) ldsw_raw(p)
+#define ldul_le_raw(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_raw(p) ldsl_raw(p)
+#endif
+#define ldq_le_raw(p) ldq_raw(p)
+#define ldfl_le_raw(p) ldfl_raw(p)
+#define ldfq_le_raw(p) ldfq_raw(p)
+#define stb_le_raw(p, v) stb_raw(p, v)
+#define stw_le_raw(p, v) stw_raw(p, v)
+#define stl_le_raw(p, v) stl_raw(p, v)
+#define stq_le_raw(p, v) stq_raw(p, v)
+#define stfl_le_raw(p, v) stfl_raw(p, v)
+#define stfq_le_raw(p, v) stfq_raw(p, v)
+#endif
 
 #if defined(CONFIG_USER_ONLY)
 
 /* if user mode, no other memory access functions */
+/* native-endian */
 #define ldub(p) ldub_raw(p)
 #define ldsb(p) ldsb_raw(p)
 #define lduw(p) lduw_raw(p)
 #define ldsw(p) ldsw_raw(p)
-#define ldl(p) ldl_raw(p)
+#define ldul(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl(p) ldsl_raw(p)
+#endif
 #define ldq(p) ldq_raw(p)
 #define ldfl(p) ldfl_raw(p)
 #define ldfq(p) ldfq_raw(p)
@@ -638,19 +858,173 @@
 #define stq(p, v) stq_raw(p, v)
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr(p) ldub_raw(p)
+#define ldsbr(p) ldsb_raw(p)
+#define lduwr(p) lduwr_raw(p)
+#define ldswr(p) ldswr_raw(p)
+#define ldulr(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr(p) ldslr_raw(p)
+#endif
+#define ldqr(p) ldqr_raw(p)
+#define ldflr(p) ldflr_raw(p)
+#define ldfqr(p) ldfqr_raw(p)
+#define stbr(p, v) stb_raw(p, v)
+#define stwr(p, v) stwr_raw(p, v)
+#define stlr(p, v) stlr_raw(p, v)
+#define stqr(p, v) stqr_raw(p, v)
+#define stflr(p, v) stflr_raw(p, v)
+#define stfqr(p, v) stfqr_raw(p, v)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be(p) ldub(p)
+#define ldsb_be(p) ldsb(p)
+#define lduw_be(p) lduw(p)
+#define ldsw_be(p) ldsw(p)
+#define ldul_be(p) ldul(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be(p) ldsl(p)
+#endif
+#define ldq_be(p) ldq(p)
+#define ldfl_be(p) ldfl(p)
+#define ldfq_be(p) ldfq(p)
+#define stb_be(p, v) stb(p, v)
+#define stw_be(p, v) stw(p, v)
+#define stl_be(p, v) stl(p, v)
+#define stq_be(p, v) stq(p, v)
+#define stfl_be(p, v) stfl(p, v)
+#define stfq_be(p, v) stfq(p, v)
+/* little-endian */
+#define ldub_le(p) ldubr(p)
+#define ldsb_le(p) ldsbr(p)
+#define lduw_le(p) lduwr(p)
+#define ldsw_le(p) ldswr(p)
+#define ldul_le(p) ldulr(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le(p) ldslr(p)
+#endif
+#define ldq_le(p) ldqr(p)
+#define ldfl_le(p) ldflr(p)
+#define ldfq_le(p) ldfqr(p)
+#define stb_le(p, v) stbr(p, v)
+#define stw_le(p, v) stwr(p, v)
+#define stl_le(p, v) stlr(p, v)
+#define stq_le(p, v) stqr(p, v)
+#define stfl_le(p, v) stflr(p, v)
+#define stfq_le(p, v) stfqr(p, v)
+#else
+/* big-endian */
+#define ldub_be(p) ldubr(p)
+#define ldsb_be(p) ldsbr(p)
+#define lduw_be(p) lduwr(p)
+#define ldsw_be(p) ldswr(p)
+#define ldul_be(p) ldulr(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be(p) ldslr(p)
+#endif
+#define ldq_be(p) ldqr(p)
+#define ldfl_be(p) ldflr(p)
+#define ldfq_be(p) ldfqr(p)
+#define stb_be(p, v) stbr(p, v)
+#define stw_be(p, v) stwr(p, v)
+#define stl_be(p, v) stlr(p, v)
+#define stq_be(p, v) stqr(p, v)
+#define stfl_be(p, v) stflr(p, v)
+#define stfq_be(p, v) stfqr(p, v)
+/* little-endian */
+#define ldub_le(p) ldub(p)
+#define ldsb_le(p) ldsb(p)
+#define lduw_le(p) lduw(p)
+#define ldsw_le(p) ldsw(p)
+#define ldul_le(p) ldul(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le(p) ldsl(p)
+#endif
+#define ldq_le(p) ldq(p)
+#define ldfl_le(p) ldfl(p)
+#define ldfq_le(p) ldfq(p)
+#define stb_le(p, v) stb(p, v)
+#define stw_le(p, v) stw(p, v)
+#define stl_le(p, v) stl(p, v)
+#define stq_le(p, v) stq(p, v)
+#define stfl_le(p, v) stfl(p, v)
+#define stfq_le(p, v) stfq(p, v)
+#endif
 
+/* native-endian */
 #define ldub_code(p) ldub_raw(p)
 #define ldsb_code(p) ldsb_raw(p)
 #define lduw_code(p) lduw_raw(p)
 #define ldsw_code(p) ldsw_raw(p)
-#define ldl_code(p) ldl_raw(p)
+#define ldul_code(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_code(p) ldsl_raw(p)
+#endif
 #define ldq_code(p) ldq_raw(p)
+/* reverse-endian */
+#define ldubr_code(p) ldub_raw(p)
+#define ldsbr_code(p) ldsb_raw(p)
+#define lduwr_code(p) lduwr_raw(p)
+#define ldswr_code(p) ldswr_raw(p)
+#define ldulr_code(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_code(p) ldslr_raw(p)
+#endif
+#define ldqr_code(p) ldqr_raw(p)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be_code(p) ldub_code(p)
+#define ldsb_be_code(p) ldsb_code(p)
+#define lduw_be_code(p) lduw_code(p)
+#define ldsw_be_code(p) ldsw_code(p)
+#define ldul_be_code(p) ldul_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_code(p) ldsl_code(p)
+#endif
+#define ldq_be_code(p) ldq_code(p)
+/* little-endian */
+#define ldub_le_code(p) ldubr_code(p)
+#define ldsb_le_code(p) ldsbr_code(p)
+#define lduw_le_code(p) lduwr_code(p)
+#define ldsw_le_code(p) ldswr_code(p)
+#define ldul_le_code(p) ldulr_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_code(p) ldslr_code(p)
+#endif
+#define ldq_le_code(p) ldqr_code(p)
+#else
+/* big-endian */
+#define ldub_be_code(p) ldubr_code(p)
+#define ldsb_be_code(p) ldsbr_code(p)
+#define lduw_be_code(p) lduwr_code(p)
+#define ldsw_be_code(p) ldswr_code(p)
+#define ldul_be_code(p) ldulr_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_code(p) ldslr_code(p)
+#endif
+#define ldq_be_code(p) ldqr_code(p)
+/* little-endian */
+#define ldub_le_code(p) ldub_code(p)
+#define ldsb_le_code(p) ldsb_code(p)
+#define lduw_le_code(p) lduw_code(p)
+#define ldsw_le_code(p) ldsw_code(p)
+#define ldul_le_code(p) ldul_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_code(p) ldsl_code(p)
+#endif
+#define ldq_le_code(p) ldq_code(p)
+#endif
 
+/* native-endian */
 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
 #define lduw_kernel(p) lduw_raw(p)
 #define ldsw_kernel(p) ldsw_raw(p)
-#define ldl_kernel(p) ldl_raw(p)
+#define ldul_kernel(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_kernel(p) ldsl_raw(p)
+#endif
 #define ldq_kernel(p) ldq_raw(p)
 #define ldfl_kernel(p) ldfl_raw(p)
 #define ldfq_kernel(p) ldfq_raw(p)
@@ -660,6 +1034,99 @@
 #define stq_kernel(p, v) stq_raw(p, v)
 #define stfl_kernel(p, v) stfl_raw(p, v)
 #define stfq_kernel(p, vt) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr_kernel(p) ldub_raw(p)
+#define ldsbr_kernel(p) ldsb_raw(p)
+#define lduwr_kernel(p) lduwr_raw(p)
+#define ldswr_kernel(p) ldswr_raw(p)
+#define ldulr_kernel(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_kernel(p) ldslr_raw(p)
+#endif
+#define ldqr_kernel(p) ldqr_raw(p)
+#define ldflr_kernel(p) ldflr_raw(p)
+#define ldfqr_kernel(p) ldfqr_raw(p)
+#define stbr_kernel(p, v) stbr_raw(p, v)
+#define stwr_kernel(p, v) stwr_raw(p, v)
+#define stlr_kernel(p, v) stlr_raw(p, v)
+#define stqr_kernel(p, v) stqr_raw(p, v)
+#define stflr_kernel(p, v) stflr_raw(p, v)
+#define stfqr_kernel(p, vt) stfqr_raw(p, v)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be_kernel(p) ldub_kernel(p)
+#define ldsb_be_kernel(p) ldsb_kernel(p)
+#define lduw_be_kernel(p) lduw_kernel(p)
+#define ldsw_be_kernel(p) ldsw_kernel(p)
+#define ldul_be_kernel(p) ldul_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_kernel(p) ldsl_kernel(p)
+#endif
+#define ldq_be_kernel(p) ldq_kernel(p)
+#define ldfl_be_kernel(p) ldfl_kernel(p)
+#define ldfq_be_kernel(p) ldfq_kernel(p)
+#define stb_be_kernel(p, v) stb_kernel(p, v)
+#define stw_be_kernel(p, v) stw_kernel(p, v)
+#define stl_be_kernel(p, v) stl_kernel(p, v)
+#define stq_be_kernel(p, v) stq_kernel(p, v)
+#define stfl_be_kernel(p, v) stfl_kernel(p, v)
+#define stfq_be_kernel(p, vt) stfq_kernel(p, vt)
+/* little-endian */
+#define ldub_le_kernel(p) ldubr_kernel(p)
+#define ldsb_le_kernel(p) ldsbr_kernel(p)
+#define lduw_le_kernel(p) lduwr_kernel(p)
+#define ldsw_le_kernel(p) ldswr_kernel(p)
+#define ldul_le_kernel(p) ldulr_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_kernel(p) ldslr_kernel(p)
+#endif
+#define ldq_le_kernel(p) ldqr_kernel(p)
+#define ldfl_le_kernel(p) ldflr_kernel(p)
+#define ldfq_le_kernel(p) ldfqr_kernel(p)
+#define stb_le_kernel(p, v) stbr_kernel(p, v)
+#define stw_le_kernel(p, v) stwr_kernel(p, v)
+#define stl_le_kernel(p, v) stlr_kernel(p, v)
+#define stq_le_kernel(p, v) stqr_kernel(p, v)
+#define stfl_le_kernel(p, v) stflr_kernel(p, v)
+#define stfq_le_kernel(p, vt) stfqr_kernel(p, vt)
+#else
+/* big-endian */
+#define ldub_be_kernel(p) ldubr_kernel(p)
+#define ldsb_be_kernel(p) ldsbr_kernel(p)
+#define lduw_be_kernel(p) lduwr_kernel(p)
+#define ldsw_be_kernel(p) ldswr_kernel(p)
+#define ldul_be_kernel(p) ldulr_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_kernel(p) ldslr_kernel(p)
+#endif
+#define ldq_be_kernel(p) ldqr_kernel(p)
+#define ldfl_be_kernel(p) ldflr_kernel(p)
+#define ldfq_be_kernel(p) ldfqr_kernel(p)
+#define stb_be_kernel(p, v) stbr_kernel(p, v)
+#define stw_be_kernel(p, v) stwr_kernel(p, v)
+#define stl_be_kernel(p, v) stlr_kernel(p, v)
+#define stq_be_kernel(p, v) stqr_kernel(p, v)
+#define stfl_be_kernel(p, v) stflr_kernel(p, v)
+#define stfq_be_kernel(p, vt) stfqr_kernel(p, vt)
+/* little-endian */
+#define ldub_le_kernel(p) ldub_kernel(p)
+#define ldsb_le_kernel(p) ldsb_kernel(p)
+#define lduw_le_kernel(p) lduw_kernel(p)
+#define ldsw_le_kernel(p) ldsw_kernel(p)
+#define ldul_le_kernel(p) ldul_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_kernel(p) ldsl_kernel(p)
+#endif
+#define ldq_le_kernel(p) ldq_kernel(p)
+#define ldfl_le_kernel(p) ldfl_kernel(p)
+#define ldfq_le_kernel(p) ldfq_kernel(p)
+#define stb_le_kernel(p, v) stb_kernel(p, v)
+#define stw_le_kernel(p, v) stw_kernel(p, v)
+#define stl_le_kernel(p, v) stl_kernel(p, v)
+#define stq_le_kernel(p, v) stq_kernel(p, v)
+#define stfl_le_kernel(p, v) stfl_kernel(p, v)
+#define stfq_le_kernel(p, vt) stfq_kernel(p, vt)
+#endif
 
 #endif /* defined(CONFIG_USER_ONLY) */
 
@@ -790,6 +1257,8 @@
    the physical address */
 #define IO_MEM_ROMD        (1)
 #define IO_MEM_SUBPAGE     (2)
+/* On some target CPUs, endiannes is stored in page tables */
+#define IO_MEM_REVERSE     (3)
 
 typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
 typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
@@ -821,7 +1290,7 @@
 }
 uint32_t ldub_phys(target_phys_addr_t addr);
 uint32_t lduw_phys(target_phys_addr_t addr);
-uint32_t ldl_phys(target_phys_addr_t addr);
+uint32_t ldul_phys(target_phys_addr_t addr);
 uint64_t ldq_phys(target_phys_addr_t addr);
 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val);
 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val);
Index: cpu-exec.c
===================================================================
--- cpu-exec.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ cpu-exec.c	2007-10-15 15:41:09.000000000 +0000
@@ -436,12 +436,12 @@
                          /* FIXME: this should respect TPR */
                          env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
                          svm_check_intercept(SVM_EXIT_VINTR);
-                         intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
+                         intno = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
                          if (loglevel & CPU_LOG_TB_IN_ASM)
                              fprintf(logfile, "Servicing virtual hardware INT=0x%02x\n", intno);
 	                 do_interrupt(intno, 0, 0, -1, 1);
                          stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl),
-                                  ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
+                                  ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
 #if defined(__sparc__) && !defined(HOST_SOLARIS)
                          tmp_T0 = 0;
 #else
Index: exec-all.h
===================================================================
--- exec-all.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ exec-all.h	2007-10-15 15:41:09.000000000 +0000
@@ -569,6 +569,7 @@
 #define MEMSUFFIX _code
 #define env cpu_single_env
 
+/* native-endian */
 #define DATA_SIZE 1
 #include "softmmu_header.h"
 
@@ -581,6 +582,21 @@
 #define DATA_SIZE 8
 #include "softmmu_header.h"
 
+/* reverse-endian */
+#define REVERSE_ENDIAN
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef REVERSE_ENDIAN
+
 #undef ACCESS_TYPE
 #undef MEMSUFFIX
 #undef env
Index: exec.c
===================================================================
--- exec.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ exec.c	2007-10-15 15:41:09.000000000 +0000
@@ -2202,7 +2202,7 @@
 
 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
 {
-    return ldl_phys(addr);
+    return ldul_phys(addr);
 }
 
 /* Generate a debug exception if a watchpoint has been hit.
@@ -2507,7 +2507,7 @@
     uint8_t *ptr;
     uint32_t val;
     target_phys_addr_t page;
-    unsigned long pd;
+    unsigned long pd, addr1;
     PhysPageDesc *p;
 
     while (len > 0) {
@@ -2524,31 +2524,54 @@
 
         if (is_write) {
             if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                /* XXX: could force cpu_single_env to NULL to avoid
-                   potential bugs */
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit write access */
-                    val = ldl_p(buf);
-                    io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit write access */
-                    val = lduw_p(buf);
-                    io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        stlr_p(ptr, *(uint32_t *)buf);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        stwr_p(ptr, *(uint16_t *)buf);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *ptr = *buf;
+                    goto invalidate_code;
                 } else {
-                    /* 8 bit write access */
-                    val = ldub_p(buf);
-                    io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
-                    l = 1;
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    /* XXX: could force cpu_single_env to NULL to avoid
+                       potential bugs */
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit write access */
+                        val = ldul_p(buf);
+                        io_mem_write[io_index][2](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit write access */
+                        val = lduw_p(buf);
+                        io_mem_write[io_index][1](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit write access */
+                        val = ldub_p(buf);
+                        io_mem_write[io_index][0](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 1;
+                    }
                 }
             } else {
-                unsigned long addr1;
                 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
                 /* RAM case */
                 ptr = phys_ram_base + addr1;
                 memcpy(ptr, buf, l);
+            invalidate_code:
                 if (!cpu_physical_memory_is_dirty(addr1)) {
                     /* invalidate code */
                     tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
@@ -2560,23 +2583,45 @@
         } else {
             if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
                 !(pd & IO_MEM_ROMD)) {
-                /* I/O case */
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit read access */
-                    val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
-                    stl_p(buf, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit read access */
-                    val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
-                    stw_p(buf, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        *(uint32_t *)buf = ldulr_p(ptr);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        *(uint16_t *)buf = lduwr_p(ptr);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *buf = *ptr;
                 } else {
-                    /* 8 bit read access */
-                    val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr);
-                    stb_p(buf, val);
-                    l = 1;
+                    /* I/O case */
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit read access */
+                        val = io_mem_read[io_index][2](io_mem_opaque[io_index],
+                                                       addr);
+                        stl_p(buf, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit read access */
+                        val = io_mem_read[io_index][1](io_mem_opaque[io_index],
+                                                       addr);
+                        stw_p(buf, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit read access */
+                        val = io_mem_read[io_index][0](io_mem_opaque[io_index],
+                                                       addr);
+                        stb_p(buf, val);
+                        l = 1;
+                    }
                 }
             } else {
                 /* RAM case */
@@ -2632,7 +2677,7 @@
 
 
 /* warning: addr must be aligned */
-uint32_t ldl_phys(target_phys_addr_t addr)
+uint32_t ldul_phys(target_phys_addr_t addr)
 {
     int io_index;
     uint8_t *ptr;
@@ -2656,7 +2701,7 @@
         /* RAM case */
         ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldl_p(ptr);
+        val = ldul_p(ptr);
     }
     return val;
 }
@@ -2907,6 +2952,21 @@
 #define env cpu_single_env
 #define SOFTMMU_CODE_ACCESS
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2918,6 +2978,7 @@
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 #undef env
 
Index: monitor.c
===================================================================
--- monitor.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ monitor.c	2007-10-15 15:41:09.000000000 +0000
@@ -595,7 +595,7 @@
                 v = lduw_raw(buf + i);
                 break;
             case 4:
-                v = (uint32_t)ldl_raw(buf + i);
+                v = (uint32_t)ldul_raw(buf + i);
                 break;
             case 8:
                 v = ldq_raw(buf + i);
Index: softmmu_exec.h
===================================================================
--- softmmu_exec.h.orig	2007-10-14 13:00:06.000000000 +0000
+++ softmmu_exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -1,13 +1,7 @@
 /* Common softmmu definitions and inline routines.  */
 
-/* XXX: find something cleaner.
- * Furthermore, this is false for 64 bits targets
- */
-#define ldul_user       ldl_user
-#define ldul_kernel     ldl_kernel
-#define ldul_hypv       ldl_hypv
-#define ldul_executive  ldl_executive
-#define ldul_supervisor ldl_supervisor
+#define lduq_user ldq_user
+#define lduq_kernel ldq_kernel
 
 #define ACCESS_TYPE 0
 #define MEMSUFFIX MMU_MODE0_SUFFIX
@@ -104,7 +98,10 @@
 #define ldsb(p) ldsb_data(p)
 #define lduw(p) lduw_data(p)
 #define ldsw(p) ldsw_data(p)
-#define ldl(p) ldl_data(p)
+#define ldul(p) ldul_data(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl(p) ldsl_data(p)
+#endif
 #define ldq(p) ldq_data(p)
 
 #define stb(p, v) stb_data(p, v)
Index: softmmu_header.h
===================================================================
--- softmmu_header.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ softmmu_header.h	2007-10-15 15:41:10.000000000 +0000
@@ -17,27 +17,86 @@
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
+
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
+#if defined(TARGET_WORDS_BIGENDIAN)
+#define ESUFFIX _be
+#else
+#define ESUFFIX _le
+#endif
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
+#define LSUFFIX q
+#define LUSUFFIX q
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
-#define USUFFIX l
+#define USUFFIX ul
+#define LSUFFIX l
+#define LUSUFFIX ul
+#if (TARGET_LONG_BITS == 64)
+#define DATA_STYPE int32_t
+#endif
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define USUFFIX uw
+#define LSUFFIX w
+#define LUSUFFIX uw
 #define DATA_TYPE uint16_t
 #define DATA_STYPE int16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define USUFFIX ub
+#define LSUFFIX b
+#define LUSUFFIX ub
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#else
+#error unsupported data size
+#endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if defined(TARGET_WORDS_BIGENDIAN)
+#define ESUFFIX _le
+#else
+#define ESUFFIX _be
+#endif
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define LSUFFIX q
+#define LUSUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX ulr
+#define LSUFFIX l
+#define LUSUFFIX ul
+#if (TARGET_LONG_BITS == 64)
+#define DATA_STYPE int32_t
+#endif
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define LSUFFIX w
+#define LUSUFFIX uw
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define LSUFFIX b
+#define LUSUFFIX ub
 #define DATA_TYPE uint8_t
 #define DATA_STYPE int8_t
 #else
 #error unsupported data size
 #endif
+#endif /* defined(REVERSE_ENDIAN) */
 
 #if ACCESS_TYPE < (NB_MMU_MODES)
 
@@ -121,7 +180,7 @@
     return res;
 }
 
-#if DATA_SIZE <= 2
+#if defined(DATA_STYPE)
 static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 {
     int res;
@@ -244,7 +303,7 @@
     return res;
 }
 
-#if DATA_SIZE <= 2
+#if defined(DATA_STYPE)
 static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 {
     int res, index;
@@ -293,8 +352,29 @@
 
 #endif /* !asm */
 
+/* BE/LE access routines */
+static inline RES_TYPE glue(glue(glue(ld, LUSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(glue(ld, USUFFIX), MEMSUFFIX)(ptr);
+}
+
+#if defined(DATA_STYPE)
+static inline RES_TYPE glue(glue(glue(lds, LSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(glue(lds, SUFFIX), MEMSUFFIX)(ptr);
+}
+#endif
+
+#if ACCESS_TYPE != 3
+static inline void glue(glue(glue(st, LSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+{
+    glue(glue(st, SUFFIX), MEMSUFFIX)(ptr, v);
+}
+#endif
+
 #if ACCESS_TYPE != (NB_MMU_MODES + 1)
 
+#if !defined(REVERSE_ENDIAN)
 #if DATA_SIZE == 8
 static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
 {
@@ -306,6 +386,11 @@
     return u.d;
 }
 
+static inline float64 glue(glue(ldfq, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfq, MEMSUFFIX)(ptr);
+}
+
 static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v)
 {
     union {
@@ -315,6 +400,12 @@
     u.d = v;
     glue(stq, MEMSUFFIX)(ptr, u.i);
 }
+
+static inline void glue(glue(stfq, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                        float64 v)
+{
+    glue(stfq, MEMSUFFIX)(ptr, v);
+}
 #endif /* DATA_SIZE == 8 */
 
 #if DATA_SIZE == 4
@@ -324,10 +415,15 @@
         float32 f;
         uint32_t i;
     } u;
-    u.i = glue(ldl, MEMSUFFIX)(ptr);
+    u.i = glue(ldul, MEMSUFFIX)(ptr);
     return u.f;
 }
 
+static inline float32 glue(glue(ldfl, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfl, MEMSUFFIX)(ptr);
+}
+
 static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
 {
     union {
@@ -337,8 +433,84 @@
     u.f = v;
     glue(stl, MEMSUFFIX)(ptr, u.i);
 }
+
+static inline void glue(glue(stfl, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                        float32 v)
+{
+    glue(stfl, MEMSUFFIX)(ptr, v);
+}
 #endif /* DATA_SIZE == 4 */
 
+#else /* defined(REVERSE_ENDIAN) */
+
+#if DATA_SIZE == 8
+static inline float64 glue(ldfqr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.i = glue(ldqr, MEMSUFFIX)(ptr);
+    return u.d;
+}
+
+static inline float64 glue(glue(ldfqr, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfqr, MEMSUFFIX)(ptr);
+}
+
+static inline void glue(stfqr, MEMSUFFIX)(target_ulong ptr, float64 v)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.d = v;
+    glue(stqr, MEMSUFFIX)(ptr, u.i);
+}
+
+static inline void glue(glue(stfqr, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                         float64 v)
+{
+    glue(stfqr, MEMSUFFIX)(ptr, v);
+}
+#endif /* DATA_SIZE == 8 */
+
+#if DATA_SIZE == 4
+static inline float32 glue(ldflr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = glue(ldulr, MEMSUFFIX)(ptr);
+    return u.f;
+}
+
+static inline float32 glue(glue(ldflr, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldflr, MEMSUFFIX)(ptr);
+}
+
+static inline void glue(stflr, MEMSUFFIX)(target_ulong ptr, float32 v)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    glue(stlr, MEMSUFFIX)(ptr, u.i);
+}
+
+static inline void glue(glue(stflr, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                         float32 v)
+{
+    glue(stflr, MEMSUFFIX)(ptr, v);
+}
+#endif /* DATA_SIZE == 4 */
+
+#endif /* defined(REVERSE_ENDIAN) */
+
 #endif /* ACCESS_TYPE != (NB_MMU_MODES + 1) */
 
 #undef RES_TYPE
@@ -346,7 +518,10 @@
 #undef DATA_STYPE
 #undef SUFFIX
 #undef USUFFIX
+#undef LSUFFIX
+#undef LUSUFFIX
 #undef DATA_SIZE
 #undef CPU_MMU_INDEX
 #undef MMUSUFFIX
+#undef ESUFFIX
 #undef ADDR_READ
Index: softmmu_template.h
===================================================================
--- softmmu_template.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ softmmu_template.h	2007-10-15 15:47:17.000000000 +0000
@@ -19,25 +19,66 @@
  */
 #define DATA_SIZE (1 << SHIFT)
 
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
+#define RSUFFIX qr
+#define URSUFFIX qr
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
-#define USUFFIX l
+#define USUFFIX ul
+#define RSUFFIX lr
+#define URSUFFIX ulr
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define USUFFIX uw
+#define RSUFFIX wr
+#define URSUFFIX uwr
 #define DATA_TYPE uint16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define USUFFIX ub
+#define RSUFFIX br
+#define URSUFFIX ubr
 #define DATA_TYPE uint8_t
 #else
 #error unsupported data size
 #endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define RSUFFIX q
+#define URSUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX ulr
+#define RSUFFIX l
+#define URSUFFIX ul
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define RSUFFIX w
+#define URSUFFIX uw
+#define DATA_TYPE uint16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define RSUFFIX b
+#define URSUFFIX ub
+#define DATA_TYPE uint8_t
+#else
+#error unsupported data size
+#endif
+#endif /* defined(REVERSE_ENDIAN) */
+
 
 #ifdef SOFTMMU_CODE_ACCESS
 #define READ_ACCESS_TYPE 2
@@ -47,25 +88,64 @@
 #define ADDR_READ addr_read
 #endif
 
+#if (defined(TARGET_WORDS_BIGENDIAN) && !defined(REVERSE_ENDIAN)) || \
+    (!defined(TARGET_WORDS_BIGENDIAN) && defined(REVERSE_ENDIAN))
+#define ACCESS_WORDS_BIGENDIAN
+#endif
+
+/* Beware: we do not have reverse-endian accessors for IOs */
+#if 0
+defined(REVERSE_ENDIAN)
+#define DO_IOSWAP 1
+#if SHIFT == 1
+#define IOSWAP(val) bswap16(val)
+#elif SHIFT >= 2
+#define IOSWAP(val) bswap32(val)
+#else
+#define IOSWAP(val) (val)
+#endif
+#else
+#define DO_IOSWAP 0
+#define IOSWAP(val) (val)
+#endif
+
 static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
                                                         int mmu_idx,
                                                         void *retaddr);
 static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
-                                              target_ulong tlb_addr)
+                                              target_ulong tlb_addr,
+                                              int do_ioswap)
 {
     DATA_TYPE res;
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
 #if SHIFT <= 2
     res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        res = IOSWAP(res);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    res = (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr) << 32;
-    res |= io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = (uint64_t)tmp << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= tmp;
 #else
-    res = io_mem_read[index][2](io_mem_opaque[index], physaddr);
-    res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = tmp;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= (uint64_t)tmp << 32;
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -92,10 +172,34 @@
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, READ_ACCESS_TYPE,
+                                            mmu_idx, retaddr);
+                    }
+#endif
+                    res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
@@ -144,10 +248,45 @@
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages) */
+                        addr1 = addr & ~(DATA_SIZE - 1);
+                        addr2 = addr1 + DATA_SIZE;
+                        res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
+                                                                      mmu_idx,
+                                                                      retaddr);
+                        res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
+                                                                      mmu_idx,
+                                                                      retaddr);
+                        shift = (addr & (DATA_SIZE - 1)) * 8;
+#ifdef ACCESS_WORDS_BIGENDIAN
+                        res = (res1 >> shift) |
+                            (res2 << ((DATA_SIZE * 8) - shift));
+#else
+                        res = (res1 << shift) |
+                            (res2 >> ((DATA_SIZE * 8) - shift));
+#endif
+                        res = (DATA_TYPE)res;
+                    } else {
+                        /* unaligned/aligned access in the same page */
+                        res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
@@ -158,7 +297,7 @@
             res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
                                                           mmu_idx, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
             res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
 #else
             res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
@@ -186,22 +325,39 @@
 static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
                                           DATA_TYPE val,
                                           target_ulong tlb_addr,
-                                          void *retaddr)
+                                          void *retaddr, int do_ioswap)
 {
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     env->mem_write_vaddr = tlb_addr;
     env->mem_write_pc = (unsigned long)retaddr;
 #if SHIFT <= 2
+    if (do_ioswap != DO_IOSWAP)
+        val = IOSWAP(val);
     io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val >> 32);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #else
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -224,12 +380,37 @@
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            retaddr = GETPC();
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    retaddr = GETPC();
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, 1, mmu_idx, retaddr);
+                    }
+#endif
+                    glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr, val);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                retaddr = GETPC();
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+            /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
@@ -275,15 +456,48 @@
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        /* XXX: not efficient, but simple */
+                        for(i = 0;i < DATA_SIZE; i++) {
+#ifdef ACCESS_WORDS_BIGENDIAN
+                            glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
+                                                      mmu_idx, retaddr);
+#else
+                            glue(slow_stb, MMUSUFFIX)(addr + i,
+                                                      val >> (((DATA_SIZE - 1) * 8)
+                                                              - (i * 8)),
+                                                      mmu_idx, retaddr);
+#endif
+                        }
+
+                    } else {
+                        /* aligned/unaligned access in the same page */
+                        glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr,
+                                                      val);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
             for(i = 0;i < DATA_SIZE; i++) {
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
                 glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
                                           mmu_idx, retaddr);
 #else
@@ -304,10 +518,15 @@
 
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
 
+#undef DO_IOSWAP
+#undef IOSWAP
+#undef ACCESS_WORDS_BIGENDIAN
 #undef READ_ACCESS_TYPE
 #undef SHIFT
 #undef DATA_TYPE
 #undef SUFFIX
 #undef USUFFIX
+#undef RSUFFIX
+#undef URSUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
Index: hw/eepro100.c
===================================================================
--- hw/eepro100.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ hw/eepro100.c	2007-10-15 15:41:10.000000000 +0000
@@ -723,7 +723,7 @@
             uint32_t tbd_address = cb_address + 0x10;
             assert(tcb_bytes <= sizeof(buf));
             while (size < tcb_bytes) {
-                uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                uint32_t tx_buffer_address = ldul_phys(tbd_address);
                 uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                 //~ uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                 tbd_address += 8;
@@ -743,7 +743,7 @@
                     /* Extended TCB. */
                     assert(tcb_bytes == 0);
                     for (; tbd_count < 2; tbd_count++) {
-                        uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                        uint32_t tx_buffer_address = ldul_phys(tbd_address);
                         uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                         uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                         tbd_address += 8;
@@ -760,7 +760,7 @@
                 }
                 tbd_address = tbd_array;
                 for (; tbd_count < tx.tbd_count; tbd_count++) {
-                    uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                    uint32_t tx_buffer_address = ldul_phys(tbd_address);
                     uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                     uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                     tbd_address += 8;
Index: hw/pc.c
===================================================================
--- hw/pc.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ hw/pc.c	2007-10-15 15:41:10.000000000 +0000
@@ -477,8 +477,8 @@
     }
 
     /* kernel protocol version */
-    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
-    if (ldl_p(header+0x202) == 0x53726448)
+    fprintf(stderr, "header magic: %#x\n", ldul_p(header+0x202));
+    if (ldul_p(header+0x202) == 0x53726448)
 	protocol = lduw_p(header+0x206);
     else
 	protocol = 0;
@@ -510,7 +510,7 @@
 
     /* highest address for loading the initrd */
     if (protocol >= 0x203)
-	initrd_max = ldl_p(header+0x22c);
+	initrd_max = ldul_p(header+0x22c);
     else
 	initrd_max = 0x37ffffff;
 
Index: hw/pl080.c
===================================================================
--- hw/pl080.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ hw/pl080.c	2007-10-15 15:41:10.000000000 +0000
@@ -162,10 +162,10 @@
             if (size == 0) {
                 /* Transfer complete.  */
                 if (ch->lli) {
-                    ch->src = ldl_phys(ch->lli);
-                    ch->dest = ldl_phys(ch->lli + 4);
-                    ch->ctrl = ldl_phys(ch->lli + 12);
-                    ch->lli = ldl_phys(ch->lli + 8);
+                    ch->src = ldul_phys(ch->lli);
+                    ch->dest = ldul_phys(ch->lli + 4);
+                    ch->ctrl = ldul_phys(ch->lli + 12);
+                    ch->lli = ldul_phys(ch->lli + 8);
                 } else {
                     ch->conf &= ~PL080_CCONF_E;
                 }
Index: hw/sun4m.c
===================================================================
--- hw/sun4m.c.orig	2007-10-14 16:27:39.000000000 +0000
+++ hw/sun4m.c	2007-10-15 15:41:10.000000000 +0000
@@ -465,7 +465,7 @@
         }
         if (initrd_size > 0) {
             for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
-                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
+                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
                     == 0x48647253) { // HdrS
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
Index: hw/sun4u.c
===================================================================
--- hw/sun4u.c.orig	2007-10-14 16:27:39.000000000 +0000
+++ hw/sun4u.c	2007-10-15 15:41:10.000000000 +0000
@@ -418,7 +418,7 @@
         }
         if (initrd_size > 0) {
             for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
-                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
+                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
                     == 0x48647253) { // HdrS
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
Index: linux-user/elfload.c
===================================================================
--- linux-user/elfload.c.orig	2007-10-14 16:22:00.000000000 +0000
+++ linux-user/elfload.c	2007-10-15 15:41:58.000000000 +0000
@@ -346,7 +346,7 @@
     pos += sizeof(abi_ulong);
     _regs->gpr[4] = pos;
     for (tmp = 1; tmp != 0; pos += sizeof(abi_ulong))
-        tmp = ldl(pos);
+        tmp = ldul(pos);
     _regs->gpr[5] = pos;
 }
 
Index: linux-user/qemu.h
===================================================================
--- linux-user/qemu.h.orig	2007-10-14 16:22:00.000000000 +0000
+++ linux-user/qemu.h	2007-10-15 15:41:10.000000000 +0000
@@ -323,7 +323,7 @@
 #define tput8(addr, val) stb(addr, val)
 #define tget16(addr) lduw(addr)
 #define tput16(addr, val) stw(addr, val)
-#define tget32(addr) ldl(addr)
+#define tget32(addr) ldul(addr)
 #define tput32(addr, val) stl(addr, val)
 #define tget64(addr) ldq(addr)
 #define tput64(addr, val) stq(addr, val)
@@ -331,7 +331,7 @@
 #define tgetl(addr) ldq(addr)
 #define tputl(addr, val) stq(addr, val)
 #else
-#define tgetl(addr) ldl(addr)
+#define tgetl(addr) ldul(addr)
 #define tputl(addr, val) stl(addr, val)
 #endif
 
Index: linux-user/signal.c
===================================================================
--- linux-user/signal.c.orig	2007-10-14 16:22:00.000000000 +0000
+++ linux-user/signal.c	2007-10-15 15:41:10.000000000 +0000
@@ -878,28 +878,28 @@
         cpu_x86_load_seg(env, R_ES, lduw(&sc->es));
         cpu_x86_load_seg(env, R_DS, lduw(&sc->ds));
 
-        env->regs[R_EDI] = ldl(&sc->edi);
-        env->regs[R_ESI] = ldl(&sc->esi);
-        env->regs[R_EBP] = ldl(&sc->ebp);
-        env->regs[R_ESP] = ldl(&sc->esp);
-        env->regs[R_EBX] = ldl(&sc->ebx);
-        env->regs[R_EDX] = ldl(&sc->edx);
-        env->regs[R_ECX] = ldl(&sc->ecx);
-        env->eip = ldl(&sc->eip);
+        env->regs[R_EDI] = ldul(&sc->edi);
+        env->regs[R_ESI] = ldul(&sc->esi);
+        env->regs[R_EBP] = ldul(&sc->ebp);
+        env->regs[R_ESP] = ldul(&sc->esp);
+        env->regs[R_EBX] = ldul(&sc->ebx);
+        env->regs[R_EDX] = ldul(&sc->edx);
+        env->regs[R_ECX] = ldul(&sc->ecx);
+        env->eip = ldul(&sc->eip);
 
         cpu_x86_load_seg(env, R_CS, lduw(&sc->cs) | 3);
         cpu_x86_load_seg(env, R_SS, lduw(&sc->ss) | 3);
 
 	{
 		unsigned int tmpflags;
-                tmpflags = ldl(&sc->eflags);
+                tmpflags = ldul(&sc->eflags);
 		env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
                 //		regs->orig_eax = -1;		/* disable syscall checks */
 	}
 
 	{
 		struct _fpstate * buf;
-                buf = (void *)ldl(&sc->fpstate);
+                buf = (void *)ldul(&sc->fpstate);
 		if (buf) {
 #if 0
 			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
@@ -909,7 +909,7 @@
 		}
 	}
 
-        *peax = ldl(&sc->eax);
+        *peax = ldul(&sc->eax);
 	return err;
 #if 0
 badframe:
Index: linux-user/vm86.c
===================================================================
--- linux-user/vm86.c.orig	2007-10-14 16:22:00.000000000 +0000
+++ linux-user/vm86.c	2007-10-15 15:41:10.000000000 +0000
@@ -56,7 +56,7 @@
 
 static inline unsigned int vm_getl(uint8_t *segptr, unsigned int reg16)
 {
-    return ldl(segptr + (reg16 & 0xffff));
+    return ldul(segptr + (reg16 & 0xffff));
 }
 
 void save_v86_state(CPUX86State *env)
Index: target-alpha/exec.h
===================================================================
--- target-alpha/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-alpha/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -62,6 +62,9 @@
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-alpha/helper.c
===================================================================
--- target-alpha/helper.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-alpha/helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -69,7 +69,7 @@
             env->exception_index = EXCP_DTB_MISS_PAL;
         else
             env->exception_index = EXCP_DTB_MISS_NATIVE;
-        opc = (ldl_code(env->pc) >> 21) << 4;
+        opc = (ldul_code(env->pc) >> 21) << 4;
         if (rw) {
             opc |= 0x9;
         } else {
Index: target-alpha/op_helper.c
===================================================================
--- target-alpha/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-alpha/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -1213,6 +1213,7 @@
 
 #define MMUSUFFIX _mmu
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1225,6 +1226,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
Index: target-alpha/op_mem.h
===================================================================
--- target-alpha/op_mem.h.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-alpha/op_mem.h	2007-10-15 15:41:10.000000000 +0000
@@ -30,7 +30,7 @@
 {
     env->lock = EA;
 
-    return glue(ldl, MEMSUFFIX)(EA);
+    return glue(ldul, MEMSUFFIX)(EA);
 }
 
 static inline uint32_t glue(ldq_l, MEMSUFFIX) (target_ulong EA)
@@ -82,7 +82,7 @@
 ALPHA_ST_OP(b, stb);
 ALPHA_LD_OP(wu, lduw);
 ALPHA_ST_OP(w, stw);
-ALPHA_LD_OP(l, ldl);
+ALPHA_LD_OP(l, ldul);
 ALPHA_ST_OP(l, stl);
 ALPHA_LD_OP(q, ldq);
 ALPHA_ST_OP(q, stq);
Index: target-alpha/translate.c
===================================================================
--- target-alpha/translate.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ target-alpha/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -2010,7 +2010,7 @@
                     ctx.pc, ctx.mem_idx);
         }
 #endif
-        insn = ldl_code(ctx.pc);
+        insn = ldul_code(ctx.pc);
 #if defined ALPHA_DEBUG_DISAS
         insn_count++;
         if (logfile != NULL) {
Index: target-arm/exec.h
===================================================================
--- target-arm/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-arm/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -64,6 +64,9 @@
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 /* In op_helper.c */
Index: target-arm/helper.c
===================================================================
--- target-arm/helper.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-arm/helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -297,7 +297,7 @@
             if (env->thumb) {
                 mask = lduw_code(env->regs[15] - 2) & 0xff;
             } else {
-                mask = ldl_code(env->regs[15] - 4) & 0xffffff;
+                mask = ldul_code(env->regs[15] - 4) & 0xffffff;
             }
             /* Only intercept calls from privileged modes, to provide some
                semblance of security.  */
@@ -473,7 +473,7 @@
         /* Pagetable walk.  */
         /* Lookup l1 descriptor.  */
         table = (env->cp15.c2_base & 0xffffc000) | ((address >> 18) & 0x3ffc);
-        desc = ldl_phys(table);
+        desc = ldul_phys(table);
         type = (desc & 3);
         domain = (env->cp15.c3 >> ((desc >> 4) & 0x1e)) & 3;
         if (type == 0) {
@@ -502,7 +502,7 @@
                 /* Fine pagetable.  */
                 table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
             }
-            desc = ldl_phys(table);
+            desc = ldul_phys(table);
             switch (desc & 3) {
             case 0: /* Page translation fault.  */
                 code = 7;
Index: target-arm/op_helper.c
===================================================================
--- target-arm/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-arm/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -180,6 +180,7 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -192,6 +193,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
Index: target-arm/op_mem.h
===================================================================
--- target-arm/op_mem.h.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-arm/op_mem.h	2007-10-15 15:41:10.000000000 +0000
@@ -1,18 +1,17 @@
 /* ARM memory operations.  */
 
-/* Load from address T1 into T0.  */
-#define MEM_LD_OP(name) \
+#define MEM_LD_OP(name, lname) \
 void OPPROTO glue(op_ld##name,MEMSUFFIX)(void) \
 { \
-    T0 = glue(ld##name,MEMSUFFIX)(T1); \
+    T0 = glue(ld##lname,MEMSUFFIX)(T1); \
     FORCE_RET(); \
 }
 
-MEM_LD_OP(ub)
-MEM_LD_OP(sb)
-MEM_LD_OP(uw)
-MEM_LD_OP(sw)
-MEM_LD_OP(l)
+MEM_LD_OP(ub,ub)
+MEM_LD_OP(sb,sb)
+MEM_LD_OP(uw,uw)
+MEM_LD_OP(sw,sw)
+MEM_LD_OP(l,ul)
 
 #undef MEM_LD_OP
 
@@ -45,7 +44,7 @@
 }
 
 MEM_SWP_OP(b, ub)
-MEM_SWP_OP(l, l)
+MEM_SWP_OP(l, ul)
 
 #undef MEM_SWP_OP
 
@@ -82,7 +81,7 @@
 
 MMX_MEM_OP(b, ub)
 MMX_MEM_OP(w, uw)
-MMX_MEM_OP(l, l)
+MMX_MEM_OP(l, ul)
 MMX_MEM_OP(q, q)
 
 #undef MMX_MEM_OP
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ target-arm/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -2206,7 +2206,7 @@
 {
     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
 
-    insn = ldl_code(s->pc);
+    insn = ldul_code(s->pc);
     s->pc += 4;
 
     cond = insn >> 28;
Index: target-cris/exec.h
===================================================================
--- target-cris/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-cris/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -50,6 +50,9 @@
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_cris_flush_flags(CPUCRISState *env, int cc_op);
Index: target-cris/helper.c
===================================================================
--- target-cris/helper.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-cris/helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -106,7 +106,7 @@
 //			printf ("BREAK! %d\n", env->trapnr);
 			irqnum = env->trapnr;
 			ebp = env->pregs[SR_EBP];
-			isr = ldl_code(ebp + irqnum * 4);
+			isr = ldul_code(ebp + irqnum * 4);
 			env->pregs[SR_ERP] = env->pc + 2;
 			env->pc = isr;
 
@@ -117,7 +117,7 @@
 //			printf ("MMU miss\n");
 			irqnum = 4;
 			ebp = env->pregs[SR_EBP];
-			isr = ldl_code(ebp + irqnum * 4);
+			isr = ldul_code(ebp + irqnum * 4);
 			env->pregs[SR_ERP] = env->pc;
 			env->pc = isr;
 			cris_shift_ccs(env);
@@ -138,7 +138,7 @@
 					__builtin_clz(env->pending_interrupts);
 				irqnum += 0x30;
 				ebp = env->pregs[SR_EBP];
-				isr = ldl_code(ebp + irqnum * 4);
+				isr = ldul_code(ebp + irqnum * 4);
 				env->pregs[SR_ERP] = env->pc;
 				env->pc = isr;
 
Index: target-cris/op_helper.c
===================================================================
--- target-cris/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-cris/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -25,6 +25,7 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -37,6 +38,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
Index: target-cris/op_mem.c
===================================================================
--- target-cris/op_mem.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-cris/op_mem.c	2007-10-15 15:41:10.000000000 +0000
@@ -49,7 +49,7 @@
 }
 
 void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
-    T0 = glue(ldl, MEMSUFFIX) (T0);
+    T0 = glue(ldul, MEMSUFFIX) (T0);
     RETURN();
 }
 
Index: target-cris/translate.c
===================================================================
--- target-cris/translate.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ target-cris/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -828,7 +828,7 @@
 		if (memsize == 1)
 			insn_len++;
 
-		imm = ldl_code(dc->pc + 2);
+		imm = ldul_code(dc->pc + 2);
 		if (memsize != 4) {
 			if (s_ext) {
 				imm = sign_extend(imm, (memsize * 8) - 1);
@@ -1962,7 +1962,7 @@
 	rd = dc->op2;
 
 	cris_cc_mask(dc, 0);
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 	DIS(fprintf (logfile, "lapc 0x%x, $r%u\n", imm + dc->pc, dc->op2));
 	gen_op_movl_T0_im (dc->pc + imm);
 	gen_movl_reg_T0[rd] ();
@@ -1999,7 +1999,7 @@
 {
 	uint32_t imm;
 
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "jas 0x%x\n", imm));
 	cris_cc_mask(dc, 0);
@@ -2016,7 +2016,7 @@
 {
 	uint32_t imm;
 
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "jasc 0x%x\n", imm));
 	cris_cc_mask(dc, 0);
@@ -2047,7 +2047,7 @@
 	int32_t offset;
 	uint32_t cond = dc->op2;
 
-	offset = ldl_code(dc->pc + 2);
+	offset = ldul_code(dc->pc + 2);
 	offset = sign_extend(offset, 15);
 
 	DIS(fprintf (logfile, "b%s %d pc=%x dst=%x\n",
@@ -2065,7 +2065,7 @@
 	int32_t simm;
 
 
-	simm = ldl_code(dc->pc + 2);
+	simm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "bas 0x%x, $p%u\n", dc->pc + simm, dc->op2));
 	cris_cc_mask(dc, 0);
@@ -2081,7 +2081,7 @@
 static unsigned int dec_basc_im(DisasContext *dc)
 {
 	int32_t simm;
-	simm = ldl_code(dc->pc + 2);
+	simm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "basc 0x%x, $p%u\n", dc->pc + simm, dc->op2));
 	cris_cc_mask(dc, 0);
@@ -2259,7 +2259,7 @@
 	int i;
 
 	/* Load a halfword onto the instruction register.  */
-	tmp = ldl_code(dc->pc);
+	tmp = ldul_code(dc->pc);
 	dc->ir = tmp & 0xffff;
 
 	/* Now decode it.  */
Index: target-i386/exec.h
===================================================================
--- target-i386/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-i386/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -217,6 +217,9 @@
 #if !defined(CONFIG_USER_ONLY)
 
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 
 static inline double ldfq(target_ulong ptr)
 {
@@ -244,7 +247,7 @@
         float f;
         uint32_t i;
     } u;
-    u.i = ldl(ptr);
+    u.i = ldul(ptr);
     return u.f;
 }
 
@@ -419,12 +422,12 @@
 
 static inline CPU86_LDouble helper_fldt(target_ulong ptr)
 {
-    return *(CPU86_LDouble *)ptr;
+    return *(CPU86_LDouble *)(unsigned long)ptr;
 }
 
 static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
 {
-    *(CPU86_LDouble *)ptr = f;
+    *(CPU86_LDouble *)(unsigned long)ptr = f;
 }
 
 #else
Index: target-i386/helper.c
===================================================================
--- target-i386/helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-i386/helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -122,8 +122,8 @@
     if ((index + 7) > dt->limit)
         return -1;
     ptr = dt->base + index;
-    *e1_ptr = ldl_kernel(ptr);
-    *e2_ptr = ldl_kernel(ptr + 4);
+    *e1_ptr = ldul_kernel(ptr);
+    *e2_ptr = ldul_kernel(ptr + 4);
     return 0;
 }
 
@@ -186,7 +186,7 @@
         *esp_ptr = lduw_kernel(env->tr.base + index);
         *ss_ptr = lduw_kernel(env->tr.base + index + 2);
     } else {
-        *esp_ptr = ldl_kernel(env->tr.base + index);
+        *esp_ptr = ldul_kernel(env->tr.base + index);
         *ss_ptr = lduw_kernel(env->tr.base + index + 4);
     }
 }
@@ -302,15 +302,15 @@
     /* read all the registers from the new TSS */
     if (type & 8) {
         /* 32 bit */
-        new_cr3 = ldl_kernel(tss_base + 0x1c);
-        new_eip = ldl_kernel(tss_base + 0x20);
-        new_eflags = ldl_kernel(tss_base + 0x24);
+        new_cr3 = ldul_kernel(tss_base + 0x1c);
+        new_eip = ldul_kernel(tss_base + 0x20);
+        new_eflags = ldul_kernel(tss_base + 0x24);
         for(i = 0; i < 8; i++)
-            new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
+            new_regs[i] = ldul_kernel(tss_base + (0x28 + i * 4));
         for(i = 0; i < 6; i++)
             new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
         new_ldt = lduw_kernel(tss_base + 0x60);
-        new_trap = ldl_kernel(tss_base + 0x64);
+        new_trap = ldul_kernel(tss_base + 0x64);
     } else {
         /* 16 bit */
         new_cr3 = 0;
@@ -341,7 +341,7 @@
         target_ulong ptr;
         uint32_t e2;
         ptr = env->gdt.base + (env->tr.selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
+        e2 = ldul_kernel(ptr + 4);
         e2 &= ~DESC_TSS_BUSY_MASK;
         stl_kernel(ptr + 4, e2);
     }
@@ -393,7 +393,7 @@
         target_ulong ptr;
         uint32_t e2;
         ptr = env->gdt.base + (tss_selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
+        e2 = ldul_kernel(ptr + 4);
         e2 |= DESC_TSS_BUSY_MASK;
         stl_kernel(ptr + 4, e2);
     }
@@ -456,8 +456,8 @@
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
             raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
         if (!(e2 & DESC_P_MASK))
@@ -580,7 +580,7 @@
 
 #define POPL(ssp, sp, sp_mask, val)\
 {\
-    val = (uint32_t)ldl_kernel((ssp) + (sp & (sp_mask)));\
+    val = (uint32_t)ldul_kernel((ssp) + (sp & (sp_mask)));\
     sp += 4;\
 }
 
@@ -629,8 +629,8 @@
     if (intno * 8 + 7 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
     ptr = dt->base + intno * 8;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
+    e1 = ldul_kernel(ptr);
+    e2 = ldul_kernel(ptr + 4);
     /* check gate type */
     type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
     switch(type) {
@@ -875,9 +875,9 @@
     if (intno * 16 + 15 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
     ptr = dt->base + intno * 16;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
-    e3 = ldl_kernel(ptr + 8);
+    e1 = ldul_kernel(ptr);
+    e2 = ldul_kernel(ptr + 4);
+    e3 = ldul_kernel(ptr + 8);
     /* check gate type */
     type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
     switch(type) {
@@ -1147,7 +1147,7 @@
 
     dt = &env->idt;
     ptr = dt->base + (intno * 8);
-    e2 = ldl_kernel(ptr + 4);
+    e2 = ldul_kernel(ptr + 4);
 
     dpl = (e2 >> DESC_DPL_SHIFT) & 3;
     cpl = env->hflags & HF_CPL_MASK;
@@ -1469,24 +1469,24 @@
         cpu_x86_load_seg_cache(env, i,
                                lduw_phys(sm_state + offset),
                                ldq_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
+                               ldul_phys(sm_state + offset + 4),
                                (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8);
     }
 
     env->gdt.base = ldq_phys(sm_state + 0x7e68);
-    env->gdt.limit = ldl_phys(sm_state + 0x7e64);
+    env->gdt.limit = ldul_phys(sm_state + 0x7e64);
 
     env->ldt.selector = lduw_phys(sm_state + 0x7e70);
     env->ldt.base = ldq_phys(sm_state + 0x7e78);
-    env->ldt.limit = ldl_phys(sm_state + 0x7e74);
+    env->ldt.limit = ldul_phys(sm_state + 0x7e74);
     env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8;
 
     env->idt.base = ldq_phys(sm_state + 0x7e88);
-    env->idt.limit = ldl_phys(sm_state + 0x7e84);
+    env->idt.limit = ldul_phys(sm_state + 0x7e84);
 
     env->tr.selector = lduw_phys(sm_state + 0x7e90);
     env->tr.base = ldq_phys(sm_state + 0x7e98);
-    env->tr.limit = ldl_phys(sm_state + 0x7e94);
+    env->tr.limit = ldul_phys(sm_state + 0x7e94);
     env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8;
 
     EAX = ldq_phys(sm_state + 0x7ff8);
@@ -1500,51 +1500,51 @@
     for(i = 8; i < 16; i++)
         env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8);
     env->eip = ldq_phys(sm_state + 0x7f78);
-    load_eflags(ldl_phys(sm_state + 0x7f70),
+    load_eflags(ldul_phys(sm_state + 0x7f70),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->dr[6] = ldl_phys(sm_state + 0x7f68);
-    env->dr[7] = ldl_phys(sm_state + 0x7f60);
+    env->dr[6] = ldul_phys(sm_state + 0x7f68);
+    env->dr[7] = ldul_phys(sm_state + 0x7f60);
 
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50));
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58));
+    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f48));
+    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7f50));
+    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7f58));
 
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff;
+        env->smbase = ldul_phys(sm_state + 0x7f00) & ~0x7fff;
     }
 #else
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8));
-    load_eflags(ldl_phys(sm_state + 0x7ff4),
+    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7ffc));
+    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7ff8));
+    load_eflags(ldul_phys(sm_state + 0x7ff4),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->eip = ldl_phys(sm_state + 0x7ff0);
-    EDI = ldl_phys(sm_state + 0x7fec);
-    ESI = ldl_phys(sm_state + 0x7fe8);
-    EBP = ldl_phys(sm_state + 0x7fe4);
-    ESP = ldl_phys(sm_state + 0x7fe0);
-    EBX = ldl_phys(sm_state + 0x7fdc);
-    EDX = ldl_phys(sm_state + 0x7fd8);
-    ECX = ldl_phys(sm_state + 0x7fd4);
-    EAX = ldl_phys(sm_state + 0x7fd0);
-    env->dr[6] = ldl_phys(sm_state + 0x7fcc);
-    env->dr[7] = ldl_phys(sm_state + 0x7fc8);
-
-    env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff;
-    env->tr.base = ldl_phys(sm_state + 0x7f64);
-    env->tr.limit = ldl_phys(sm_state + 0x7f60);
-    env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
-
-    env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff;
-    env->ldt.base = ldl_phys(sm_state + 0x7f80);
-    env->ldt.limit = ldl_phys(sm_state + 0x7f7c);
-    env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
+    env->eip = ldul_phys(sm_state + 0x7ff0);
+    EDI = ldul_phys(sm_state + 0x7fec);
+    ESI = ldul_phys(sm_state + 0x7fe8);
+    EBP = ldul_phys(sm_state + 0x7fe4);
+    ESP = ldul_phys(sm_state + 0x7fe0);
+    EBX = ldul_phys(sm_state + 0x7fdc);
+    EDX = ldul_phys(sm_state + 0x7fd8);
+    ECX = ldul_phys(sm_state + 0x7fd4);
+    EAX = ldul_phys(sm_state + 0x7fd0);
+    env->dr[6] = ldul_phys(sm_state + 0x7fcc);
+    env->dr[7] = ldul_phys(sm_state + 0x7fc8);
+
+    env->tr.selector = ldul_phys(sm_state + 0x7fc4) & 0xffff;
+    env->tr.base = ldul_phys(sm_state + 0x7f64);
+    env->tr.limit = ldul_phys(sm_state + 0x7f60);
+    env->tr.flags = (ldul_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
+
+    env->ldt.selector = ldul_phys(sm_state + 0x7fc0) & 0xffff;
+    env->ldt.base = ldul_phys(sm_state + 0x7f80);
+    env->ldt.limit = ldul_phys(sm_state + 0x7f7c);
+    env->ldt.flags = (ldul_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
 
-    env->gdt.base = ldl_phys(sm_state + 0x7f74);
-    env->gdt.limit = ldl_phys(sm_state + 0x7f70);
+    env->gdt.base = ldul_phys(sm_state + 0x7f74);
+    env->gdt.limit = ldul_phys(sm_state + 0x7f70);
 
-    env->idt.base = ldl_phys(sm_state + 0x7f58);
-    env->idt.limit = ldl_phys(sm_state + 0x7f54);
+    env->idt.base = ldul_phys(sm_state + 0x7f58);
+    env->idt.limit = ldul_phys(sm_state + 0x7f54);
 
     for(i = 0; i < 6; i++) {
         if (i < 3)
@@ -1552,16 +1552,16 @@
         else
             offset = 0x7f2c + (i - 3) * 12;
         cpu_x86_load_seg_cache(env, i,
-                               ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
-                               ldl_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
-                               (ldl_phys(sm_state + offset) & 0xf0ff) << 8);
+                               ldul_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
+                               ldul_phys(sm_state + offset + 8),
+                               ldul_phys(sm_state + offset + 4),
+                               (ldul_phys(sm_state + offset) & 0xf0ff) << 8);
     }
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14));
+    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f14));
 
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff;
+        env->smbase = ldul_phys(sm_state + 0x7ef8) & ~0x7fff;
     }
 #endif
     CC_OP = CC_OP_EFLAGS;
@@ -1761,7 +1761,7 @@
         while (--level) {
             esp -= 4;
             ebp -= 4;
-            stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask)));
+            stl(ssp + (esp & esp_mask), ldul(ssp + (ebp & esp_mask)));
         }
         esp -= 4;
         stl(ssp + (esp & esp_mask), T1);
@@ -1836,8 +1836,8 @@
         if ((index + entry_limit) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         if (!(e2 & DESC_P_MASK))
@@ -1845,7 +1845,7 @@
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             uint32_t e3;
-            e3 = ldl_kernel(ptr + 8);
+            e3 = ldul_kernel(ptr + 8);
             load_seg_cache_raw_dt(&env->ldt, e1, e2);
             env->ldt.base |= (target_ulong)e3 << 32;
         } else
@@ -1885,8 +1885,8 @@
         if ((index + entry_limit) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
         if ((e2 & DESC_S_MASK) ||
             (type != 1 && type != 9))
@@ -1896,8 +1896,8 @@
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             uint32_t e3, e4;
-            e3 = ldl_kernel(ptr + 8);
-            e4 = ldl_kernel(ptr + 12);
+            e3 = ldul_kernel(ptr + 8);
+            e4 = ldul_kernel(ptr + 12);
             if ((e4 >> DESC_TYPE_SHIFT) & 0xf)
                 raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
             load_seg_cache_raw_dt(&env->tr, e1, e2);
@@ -1943,8 +1943,8 @@
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
 
         if (!(e2 & DESC_S_MASK))
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
@@ -2273,7 +2273,7 @@
                 PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector);
                 PUSHL(ssp, sp, sp_mask, ESP);
                 for(i = param_count - 1; i >= 0; i--) {
-                    val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
+                    val = ldul_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
                     PUSHL(ssp, sp, sp_mask, val);
                 }
             } else {
@@ -3569,8 +3569,8 @@
 
     if (env->cr[4] & CR4_OSFXSR_MASK) {
         /* XXX: finish it */
-        env->mxcsr = ldl(ptr + 0x18);
-        //ldl(ptr + 0x1c);
+        env->mxcsr = ldul(ptr + 0x18);
+        //ldul(ptr + 0x1c);
         nb_xmm_regs = 8 << data64;
         addr = ptr + 0xa0;
         for(i = 0; i < nb_xmm_regs; i++) {
@@ -3867,6 +3867,7 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -3879,6 +3880,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 #endif
 
 /* try to fill the TLB and return an exception if error. If retaddr is
@@ -4010,13 +4026,13 @@
     env->intercept_cr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write));
     env->intercept_dr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read));
     env->intercept_dr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write));
-    env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
+    env->intercept_exceptions = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
 
     env->gdt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
+    env->gdt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
 
     env->idt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
+    env->idt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
 
     /* clear exit_info_2 so we behave like the real hardware */
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
@@ -4025,7 +4041,7 @@
     cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
     cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
     env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
-    int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+    int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
     if (int_ctl & V_INTR_MASKING_MASK) {
         env->cr[8] = int_ctl & V_TPR_MASK;
         if (env->eflags & IF_MASK)
@@ -4073,11 +4089,11 @@
     regs_to_env();
 
     /* maybe we need to inject an event */
-    event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+    event_inj = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
     if (event_inj & SVM_EVTINJ_VALID) {
         uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
         uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
-        uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
+        uint32_t event_inj_err = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
 
         if (loglevel & CPU_LOG_TB_IN_ASM)
@@ -4309,7 +4325,7 @@
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]);
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]);
 
-    if ((int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
+    if ((int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
         int_ctl &= ~V_TPR_MASK;
         int_ctl |= env->cr[8] & V_TPR_MASK;
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl);
@@ -4330,10 +4346,10 @@
     env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
 
     env->gdt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
+    env->gdt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
 
     env->idt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
+    env->idt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
 
     cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
     cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
Index: target-i386/helper2.c
===================================================================
--- target-i386/helper2.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-i386/helper2.c	2007-10-15 15:41:10.000000000 +0000
@@ -772,7 +772,7 @@
         /* page directory entry */
         pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) &
             env->a20_mask;
-        pde = ldl_phys(pde_addr);
+        pde = ldul_phys(pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             error_code = 0;
             goto do_fault;
@@ -810,7 +810,7 @@
             /* page directory entry */
             pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
                 env->a20_mask;
-            pte = ldl_phys(pte_addr);
+            pte = ldul_phys(pte_addr);
             if (!(pte & PG_PRESENT_MASK)) {
                 error_code = 0;
                 goto do_fault;
@@ -910,13 +910,13 @@
 
             pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pml4e = ldl_phys(pml4e_addr);
+            pml4e = ldul_phys(pml4e_addr);
             if (!(pml4e & PG_PRESENT_MASK))
                 return -1;
 
             pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pdpe = ldl_phys(pdpe_addr);
+            pdpe = ldul_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK))
                 return -1;
         } else
@@ -924,14 +924,14 @@
         {
             pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
                 env->a20_mask;
-            pdpe = ldl_phys(pdpe_addr);
+            pdpe = ldul_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK))
                 return -1;
         }
 
         pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) &
             env->a20_mask;
-        pde = ldl_phys(pde_addr);
+        pde = ldul_phys(pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             return -1;
         }
@@ -944,7 +944,7 @@
             pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) &
                 env->a20_mask;
             page_size = 4096;
-            pte = ldl_phys(pte_addr);
+            pte = ldul_phys(pte_addr);
         }
     } else {
         if (!(env->cr[0] & CR0_PG_MASK)) {
@@ -953,7 +953,7 @@
         } else {
             /* page directory entry */
             pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
-            pde = ldl_phys(pde_addr);
+            pde = ldul_phys(pde_addr);
             if (!(pde & PG_PRESENT_MASK))
                 return -1;
             if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
@@ -962,7 +962,7 @@
             } else {
                 /* page directory entry */
                 pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
-                pte = ldl_phys(pte_addr);
+                pte = ldul_phys(pte_addr);
                 if (!(pte & PG_PRESENT_MASK))
                     return -1;
                 page_size = 4096;
Index: target-i386/op.c
===================================================================
--- target-i386/op.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-i386/op.c	2007-10-15 15:41:10.000000000 +0000
@@ -716,8 +716,8 @@
 void OPPROTO op_boundl(void)
 {
     int low, high, v;
-    low = ldl(A0);
-    high = ldl(A0 + 4);
+    low = ldul(A0);
+    high = ldul(A0 + 4);
     v = T0;
     if (v < low || v > high) {
         raise_exception(EXCP05_BOUND);
@@ -747,8 +747,6 @@
 
 /* multiple size ops */
 
-#define ldul ldl
-
 #define SHIFT 0
 #include "ops_template.h"
 #undef SHIFT
@@ -1688,7 +1686,7 @@
 void OPPROTO op_flds_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = ldl(A0);
+    FP_CONVERT.i32 = ldul(A0);
     FT0 = FP_CONVERT.f;
 #else
     FT0 = ldfl(A0);
@@ -1715,7 +1713,7 @@
 
 void helper_fildl_FT0_A0(void)
 {
-    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
 }
 
 void helper_fildll_FT0_A0(void)
@@ -1753,10 +1751,10 @@
 void OPPROTO op_fildl_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    FP_CONVERT.i32 = (int32_t) ldul(A0);
     FT0 = (CPU86_LDouble)FP_CONVERT.i32;
 #else
-    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
 #endif
 }
 
@@ -1778,7 +1776,7 @@
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = ldl(A0);
+    FP_CONVERT.i32 = ldul(A0);
     env->fpregs[new_fpstt].d = FP_CONVERT.f;
 #else
     env->fpregs[new_fpstt].d = ldfl(A0);
@@ -1822,7 +1820,7 @@
 {
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
 }
@@ -1872,10 +1870,10 @@
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    FP_CONVERT.i32 = (int32_t) ldul(A0);
     env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i32;
 #else
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
 #endif
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
Index: target-i386/ops_mem.h
===================================================================
--- target-i386/ops_mem.h.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-i386/ops_mem.h	2007-10-15 15:41:10.000000000 +0000
@@ -20,7 +20,7 @@
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T0_A0)(void)
 {
-    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldub, MEMSUFFIX), _T1_A0)(void)
@@ -45,7 +45,7 @@
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T1_A0)(void)
 {
-    T1 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+    T1 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_stb, MEMSUFFIX), _T0_A0)(void)
@@ -122,12 +122,12 @@
 #ifdef TARGET_X86_64
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T0_A0)(void)
 {
-    T0 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    T0 = glue(ldsl, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T1_A0)(void)
 {
-    T1 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    T1 = glue(ldsl, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T0_A0)(void)
Index: target-i386/svm.h
===================================================================
--- target-i386/svm.h.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-i386/svm.h	2007-10-15 15:41:10.000000000 +0000
@@ -340,13 +340,13 @@
                     R_##seg_index, \
                     lduw_phys(addr + offsetof(struct vmcb, save.seg.selector)),\
                     ldq_phys(addr + offsetof(struct vmcb, save.seg.base)),\
-                    ldl_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
-                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), ldq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldl_phys(addr + offsetof(struct vmcb, save.seg.limit))))
+                    ldul_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
+                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), ldq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldul_phys(addr + offsetof(struct vmcb, save.seg.limit))))
 
 #define SVM_LOAD_SEG2(addr, seg_qemu, seg_vmcb) \
     env->seg_qemu.selector  = lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.selector)); \
     env->seg_qemu.base      = ldq_phys(addr + offsetof(struct vmcb, save.seg_vmcb.base)); \
-    env->seg_qemu.limit     = ldl_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
+    env->seg_qemu.limit     = ldul_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
     env->seg_qemu.flags     = vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.attrib)), env->seg_qemu.base, env->seg_qemu.limit)
 
 #define SVM_SAVE_SEG(addr, seg_qemu, seg_vmcb) \
Index: target-i386/translate-copy.c
===================================================================
--- target-i386/translate-copy.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-i386/translate-copy.c	2007-10-15 15:41:10.000000000 +0000
@@ -207,7 +207,7 @@
         case 0:
             if (base == 5) {
                 base = -1;
-                disp = ldl_code(s->pc);
+                disp = ldul_code(s->pc);
                 s->pc += 4;
             } else {
                 disp = 0;
@@ -218,7 +218,7 @@
             break;
         default:
         case 2:
-            disp = ldl_code(s->pc);
+            disp = ldul_code(s->pc);
             s->pc += 4;
             break;
         }
@@ -266,7 +266,7 @@
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = ldul_code(s->pc);
         s->pc += 4;
         break;
     }
Index: target-i386/translate.c
===================================================================
--- target-i386/translate.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ target-i386/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -1462,7 +1462,7 @@
         case 0:
             if ((base & 7) == 5) {
                 base = -1;
-                disp = (int32_t)ldl_code(s->pc);
+                disp = (int32_t)ldul_code(s->pc);
                 s->pc += 4;
                 if (CODE64(s) && !havesib) {
                     disp += s->pc + s->rip_offset;
@@ -1476,7 +1476,7 @@
             break;
         default:
         case 2:
-            disp = ldl_code(s->pc);
+            disp = ldul_code(s->pc);
             s->pc += 4;
             break;
         }
@@ -1736,7 +1736,7 @@
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = ldul_code(s->pc);
         s->pc += 4;
         break;
     }
Index: target-m68k/exec.h
===================================================================
--- target-m68k/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-m68k/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -42,6 +42,9 @@
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_m68k_flush_flags(CPUM68KState *env, int cc_op);
Index: target-m68k/op_helper.c
===================================================================
--- target-m68k/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-m68k/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -33,6 +33,7 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -45,6 +46,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
@@ -83,8 +99,8 @@
     uint32_t fmt;
 
     sp = env->aregs[7];
-    fmt = ldl_kernel(sp);
-    env->pc = ldl_kernel(sp + 4);
+    fmt = ldul_kernel(sp);
+    env->pc = ldul_kernel(sp + 4);
     sp |= (fmt >> 28) & 3;
     env->sr = fmt & 0xffff;
     m68k_switch_sp(env);
@@ -112,7 +128,7 @@
                     && (env->sr & SR_S) != 0
                     && (env->pc & 3) == 0
                     && lduw_code(env->pc - 4) == 0x4e71
-                    && ldl_code(env->pc) == 0x4e7bf000) {
+                    && ldul_code(env->pc) == 0x4e7bf000) {
                 env->pc += 4;
                 do_m68k_semihosting(env, env->dregs[0]);
                 return;
@@ -153,7 +169,7 @@
     stl_kernel(sp, fmt);
     env->aregs[7] = sp;
     /* Jump to vector.  */
-    env->pc = ldl_kernel(env->vbr + vector);
+    env->pc = ldul_kernel(env->vbr + vector);
 }
 
 #endif
Index: target-m68k/op_mem.h
===================================================================
--- target-m68k/op_mem.h.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-m68k/op_mem.h	2007-10-15 15:41:10.000000000 +0000
@@ -11,7 +11,7 @@
 MEM_LD_OP(8s32,sb)
 MEM_LD_OP(16u32,uw)
 MEM_LD_OP(16s32,sw)
-MEM_LD_OP(32,l)
+MEM_LD_OP(32,ul)
 
 #undef MEM_LD_OP
 
Index: target-mips/exec.h
===================================================================
--- target-mips/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-mips/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -54,6 +54,9 @@
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 #if defined(TARGET_MIPSN32) || defined(TARGET_MIPS64)
Index: target-mips/op_helper.c
===================================================================
--- target-mips/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-mips/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -544,6 +544,7 @@
 #define MMUSUFFIX _mmu
 #define ALIGNED_ONLY
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -556,6 +557,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr)
 {
     env->CP0_BadVAddr = addr;
Index: target-mips/op_mem.c
===================================================================
--- target-mips/op_mem.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-mips/op_mem.c	2007-10-15 15:41:10.000000000 +0000
@@ -57,13 +57,13 @@
 
 void glue(op_lw, MEMSUFFIX) (void)
 {
-    T0 = glue(ldl, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 
 void glue(op_lwu, MEMSUFFIX) (void)
 {
-    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(T0);
+    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 
@@ -167,7 +167,7 @@
 void glue(op_ll, MEMSUFFIX) (void)
 {
     T1 = T0;
-    T0 = glue(ldl, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)(T0);
     env->CP0_LLAddr = T1;
     RETURN();
 }
@@ -383,7 +383,7 @@
 
 void glue(op_lwc1, MEMSUFFIX) (void)
 {
-    WT0 = glue(ldl, MEMSUFFIX)(T0);
+    WT0 = glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 void glue(op_swc1, MEMSUFFIX) (void)
Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ target-mips/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -6544,7 +6544,7 @@
             gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
             gen_opc_instr_start[lj] = 1;
         }
-        ctx.opcode = ldl_code(ctx.pc);
+        ctx.opcode = ldul_code(ctx.pc);
         decode_opc(env, &ctx);
         ctx.pc += 4;
 
Index: target-ppc/exec.h
===================================================================
--- target-ppc/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-ppc/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -91,7 +91,12 @@
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
+
+#include "softmmu_exec.h"
+#define REVERSE_ENDIAN
 #include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
+
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 void do_raise_exception_err (uint32_t exception, int error_code);
Index: target-ppc/helper.c
===================================================================
--- target-ppc/helper.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-ppc/helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -569,8 +569,8 @@
         } else
 #endif
         {
-            pte0 = ldl_phys(base + (i * 8));
-            pte1 =  ldl_phys(base + (i * 8) + 4);
+            pte0 = ldul_phys(base + (i * 8));
+            pte1 =  ldul_phys(base + (i * 8) + 4);
             r = pte32_check(ctx, pte0, pte1, h, rw, type);
 #if defined (DEBUG_MMU)
             if (loglevel != 0) {
@@ -686,7 +686,7 @@
     mask = 0x0000000000000000ULL; /* Avoid gcc warning */
     for (n = 0; n < env->slb_nr; n++) {
         tmp64 = ldq_phys(sr_base);
-        tmp = ldl_phys(sr_base + 8);
+        tmp = ldul_phys(sr_base + 8);
 #if defined(DEBUG_SLB)
         if (loglevel != 0) {
             fprintf(logfile, "%s: seg %d " PADDRX " %016" PRIx64 " %08"
@@ -784,7 +784,7 @@
     sr_base = env->spr[SPR_ASR];
     sr_base += 12 * slb_nr;
     tmp64 = ldq_phys(sr_base);
-    tmp = ldl_phys(sr_base + 8);
+    tmp = ldul_phys(sr_base + 8);
     if (tmp64 & 0x0000000008000000ULL) {
         /* SLB entry is valid */
         /* Copy SLB bits 62:88 to Rt 37:63 (VSID 23:49) */
@@ -990,10 +990,10 @@
                         sdr, mask + 0x80);
                 for (curaddr = sdr; curaddr < (sdr + mask + 0x80);
                      curaddr += 16) {
-                    a0 = ldl_phys(curaddr);
-                    a1 = ldl_phys(curaddr + 4);
-                    a2 = ldl_phys(curaddr + 8);
-                    a3 = ldl_phys(curaddr + 12);
+                    a0 = ldul_phys(curaddr);
+                    a1 = ldul_phys(curaddr + 4);
+                    a2 = ldul_phys(curaddr + 8);
+                    a3 = ldul_phys(curaddr + 12);
                     if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) {
                         fprintf(logfile,
                                 PADDRX ": %08x %08x %08x %08x\n",
@@ -2266,7 +2266,7 @@
 #endif
         /* XXX: this is false */
         /* Get rS/rD and rA from faulting opcode */
-        env->spr[SPR_DSISR] |= (ldl_code((env->nip - 4)) & 0x03FF0000) >> 16;
+        env->spr[SPR_DSISR] |= (ldul_code((env->nip - 4)) & 0x03FF0000) >> 16;
         goto store_current;
     case POWERPC_EXCP_PROGRAM:   /* Program exception                        */
         switch (env->error_code & ~0xF) {
Index: target-ppc/op_helper.c
===================================================================
--- target-ppc/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-ppc/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -2296,6 +2296,7 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2308,6 +2309,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
Index: target-ppc/op_helper.h
===================================================================
--- target-ppc/op_helper.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-ppc/op_helper.h	2007-10-15 15:41:10.000000000 +0000
@@ -37,19 +37,6 @@
 void glue(do_POWER2_stfq, MEMSUFFIX) (void);
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void);
 
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst);
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stsw_64, MEMSUFFIX) (int src);
-void glue(do_stsw_le_64, MEMSUFFIX) (int src);
-void glue(do_lmw_64, MEMSUFFIX) (int dst);
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stmw_64, MEMSUFFIX) (int src);
-void glue(do_stmw_le_64, MEMSUFFIX) (int src);
-void glue(do_icbi_64, MEMSUFFIX) (void);
-void glue(do_dcbz_64, MEMSUFFIX) (void);
-#endif
-
 #else
 
 void do_print_mem_EA (target_ulong EA);
Index: target-ppc/op_helper_mem.h
===================================================================
--- target-ppc/op_helper_mem.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-ppc/op_helper_mem.h	2007-10-15 15:41:10.000000000 +0000
@@ -19,229 +19,101 @@
  */
 
 /* Multiple word / string load and store */
-static always_inline target_ulong glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000UL) >> 24) | ((tmp & 0x00FF0000UL) >> 8) |
-        ((tmp & 0x0000FF00UL) << 8) | ((tmp & 0x000000FFUL) << 24);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  target_ulong data)
-{
-    uint32_t tmp =
-        ((data & 0xFF000000UL) >> 24) | ((data & 0x00FF0000UL) >> 8) |
-        ((data & 0x0000FF00UL) << 8) | ((data & 0x000000FFUL) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
 void glue(do_lmw, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldul, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lmw_le, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldulr, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw_le, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
 
-#if defined(TARGET_PPC64)
-void glue(do_stmw_le_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
-    }
-}
-#endif
-
 void glue(do_lsw, MEMSUFFIX) (int dst)
 {
     uint32_t tmp;
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+        env->gpr[dst++] = glue(ldul, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
 
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-#endif
-
 void glue(do_stsw, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
 
-#if defined(TARGET_PPC64)
-void glue(do_stsw_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-#endif
-
 void glue(do_lsw_le, MEMSUFFIX) (int dst)
 {
     uint32_t tmp;
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+        env->gpr[dst++] = glue(ldulr, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
 
-#if defined(TARGET_PPC64)
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-#endif
-
 void glue(do_stsw_le, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_le_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 /* Instruction cache invalidation helper */
 void glue(do_icbi, MEMSUFFIX) (void)
@@ -252,28 +124,12 @@
      * (not a fetch) by the MMU. To be sure it will be so,
      * do the load "by hand".
      */
-    tmp = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+    tmp = glue(ldul, MEMSUFFIX)(T0);
     T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint32_t)T0,
-                             (uint32_t)(T0 + env->icache_line_size));
+    /* We assume it would not wrap around 2^32 on 32 bits targets */
+    tb_invalidate_page_range(T0, T0 + env->icache_line_size);
 }
 
-#if defined(TARGET_PPC64)
-void glue(do_icbi_64, MEMSUFFIX) (void)
-{
-    uint64_t tmp;
-    /* Invalidate one cache line :
-     * PowerPC specification says this is to be treated like a load
-     * (not a fetch) by the MMU. To be sure it will be so,
-     * do the load "by hand".
-     */
-    tmp = glue(ldq, MEMSUFFIX)((uint64_t)T0);
-    T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint64_t)T0,
-                             (uint64_t)(T0 + env->icache_line_size));
-}
-#endif
-
 void glue(do_dcbz, MEMSUFFIX) (void)
 {
     int dcache_line_size = env->dcache_line_size;
@@ -281,91 +137,44 @@
     /* XXX: should be 970 specific (?) */
     if (((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1)
         dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x1C), 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x00, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x04, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x08, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x0C, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x10, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x14, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x18, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x1C, 0);
     if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x3CUL), 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x20UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x24UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x28UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x2CUL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x30UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x34UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x38UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x3CUL, 0);
         if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x7CUL), 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x40UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x44UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x48UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x4CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x50UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x54UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x58UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x5CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x60UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x64UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x68UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x6CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x70UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x74UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x78UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x7CUL, 0);
         }
     }
 }
 
-#if defined(TARGET_PPC64)
-void glue(do_dcbz_64, MEMSUFFIX) (void)
-{
-    int dcache_line_size = env->dcache_line_size;
-
-    /* XXX: should be 970 specific (?) */
-    if (((env->spr[SPR_970_HID5] >> 6) & 0x3) == 0x2)
-        dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x1C), 0);
-    if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x3CUL), 0);
-        if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x7CUL), 0);
-        }
-    }
-}
-#endif
-
 /* PowerPC 601 specific instructions (POWER bridge) */
 // XXX: to be tested
 void glue(do_POWER_lscbx, MEMSUFFIX) (int dest, int ra, int rb)
@@ -400,26 +209,6 @@
     FT1 = glue(ldfq, MEMSUFFIX)((uint32_t)(T0 + 4));
 }
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 void glue(do_POWER2_lfq_le, MEMSUFFIX) (void)
 {
     FT0 = glue(ldfqr, MEMSUFFIX)((uint32_t)(T0 + 4));
@@ -432,25 +221,6 @@
     glue(stfq, MEMSUFFIX)((uint32_t)(T0 + 4), FT1);
 }
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void)
 {
     glue(stfqr, MEMSUFFIX)((uint32_t)(T0 + 4), FT0);
Index: target-ppc/op_mem.h
===================================================================
--- target-ppc/op_mem.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-ppc/op_mem.h	2007-10-15 15:41:10.000000000 +0000
@@ -18,85 +18,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-static always_inline uint16_t glue(ld16r, MEMSUFFIX) (target_ulong EA)
-{
-    uint16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline int32_t glue(ld16rs, MEMSUFFIX) (target_ulong EA)
-{
-    int16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return (int16_t)((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline uint32_t glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline uint64_t glue(ld64r, MEMSUFFIX) (target_ulong EA)
-{
-    uint64_t tmp = glue(ldq, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00000000000000ULL) >> 56) |
-        ((tmp & 0x00FF000000000000ULL) >> 40) |
-        ((tmp & 0x0000FF0000000000ULL) >> 24) |
-        ((tmp & 0x000000FF00000000ULL) >> 8) |
-        ((tmp & 0x00000000FF000000ULL) << 8) |
-        ((tmp & 0x0000000000FF0000ULL) << 24) |
-        ((tmp & 0x000000000000FF00ULL) << 40) |
-        ((tmp & 0x00000000000000FFULL) << 54);
-}
-#endif
-
-#if defined(TARGET_PPC64)
-static always_inline int64_t glue(ldsl, MEMSUFFIX) (target_ulong EA)
-{
-    return (int32_t)glue(ldl, MEMSUFFIX)(EA);
-}
-
-static always_inline int64_t glue(ld32rs, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return (int32_t)((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-#endif
-
-static always_inline void glue(st16r, MEMSUFFIX) (target_ulong EA,
-                                                  uint16_t data)
-{
-    uint16_t tmp = ((data & 0xFF00) >> 8) | ((data & 0x00FF) << 8);
-    glue(stw, MEMSUFFIX)(EA, tmp);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  uint32_t data)
-{
-    uint32_t tmp = ((data & 0xFF000000) >> 24) | ((data & 0x00FF0000) >> 8) |
-        ((data & 0x0000FF00) << 8) | ((data & 0x000000FF) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline void glue(st64r, MEMSUFFIX) (target_ulong EA,
-                                                  uint64_t data)
-{
-    uint64_t tmp = ((data & 0xFF00000000000000ULL) >> 56) |
-        ((data & 0x00FF000000000000ULL) >> 40) |
-        ((data & 0x0000FF0000000000ULL) >> 24) |
-        ((data & 0x000000FF00000000ULL) >> 8) |
-        ((data & 0x00000000FF000000ULL) << 8) |
-        ((data & 0x0000000000FF0000ULL) << 24) |
-        ((data & 0x000000000000FF00ULL) << 40) |
-        ((data & 0x00000000000000FFULL) << 56);
-    glue(stq, MEMSUFFIX)(EA, tmp);
-}
-#endif
-
 /***                             Integer load                              ***/
 #define PPC_LD_OP(name, op)                                                   \
 void OPPROTO glue(glue(op_l, name), MEMSUFFIX) (void)                         \
@@ -130,10 +51,11 @@
 }
 #endif
 
+/* Native-endian fixed-point memory loads                                    */
 PPC_LD_OP(bz, ldub);
 PPC_LD_OP(ha, ldsw);
 PPC_LD_OP(hz, lduw);
-PPC_LD_OP(wz, ldl);
+PPC_LD_OP(wz, ldul);
 #if defined(TARGET_PPC64)
 PPC_LD_OP(d, ldq);
 PPC_LD_OP(wa, ldsl);
@@ -142,23 +64,24 @@
 PPC_LD_OP_64(bz, ldub);
 PPC_LD_OP_64(ha, ldsw);
 PPC_LD_OP_64(hz, lduw);
-PPC_LD_OP_64(wz, ldl);
+PPC_LD_OP_64(wz, ldul);
 #endif
 
-PPC_LD_OP(ha_le, ld16rs);
-PPC_LD_OP(hz_le, ld16r);
-PPC_LD_OP(wz_le, ld32r);
+/* Reverse-endian fixed-point memory loads                                   */
+PPC_LD_OP(ha_le, ldswr);
+PPC_LD_OP(hz_le, lduwr);
+PPC_LD_OP(wz_le, ldulr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP(d_le, ld64r);
-PPC_LD_OP(wa_le, ld32rs);
-PPC_LD_OP_64(d_le, ld64r);
-PPC_LD_OP_64(wa_le, ld32rs);
-PPC_LD_OP_64(ha_le, ld16rs);
-PPC_LD_OP_64(hz_le, ld16r);
-PPC_LD_OP_64(wz_le, ld32r);
+PPC_LD_OP(d_le, ldqr);
+PPC_LD_OP(wa_le, ldslr);
+PPC_LD_OP_64(d_le, ldqr);
+PPC_LD_OP_64(wa_le, ldslr);
+PPC_LD_OP_64(ha_le, ldswr);
+PPC_LD_OP_64(hz_le, lduwr);
+PPC_LD_OP_64(wz_le, ldulr);
 #endif
 
-/***                              Integer store                            ***/
+/* Native-endian fixed-point memory stores                                   */
 PPC_ST_OP(b, stb);
 PPC_ST_OP(h, stw);
 PPC_ST_OP(w, stl);
@@ -170,120 +93,110 @@
 PPC_ST_OP_64(w, stl);
 #endif
 
-PPC_ST_OP(h_le, st16r);
-PPC_ST_OP(w_le, st32r);
+/* Reverse-endian fixed-point memory stores                                  */
+PPC_ST_OP(h_le, stwr);
+PPC_ST_OP(w_le, stlr);
+#if defined(TARGET_PPC64)
+PPC_ST_OP(d_le, stqr);
+PPC_ST_OP_64(d_le, stqr);
+PPC_ST_OP_64(h_le, stwr);
+PPC_ST_OP_64(w_le, stlr);
+#endif
+
+/* Native-endian fixed-point loads and stores with byte-reverse              */
+PPC_LD_OP(hbr, lduwr);
+PPC_LD_OP(wbr, ldulr);
+PPC_ST_OP(hbr, stwr);
+PPC_ST_OP(wbr, stlr);
 #if defined(TARGET_PPC64)
-PPC_ST_OP(d_le, st64r);
-PPC_ST_OP_64(d_le, st64r);
-PPC_ST_OP_64(h_le, st16r);
-PPC_ST_OP_64(w_le, st32r);
-#endif
-
-/***                Integer load and store with byte reverse               ***/
-PPC_LD_OP(hbr, ld16r);
-PPC_LD_OP(wbr, ld32r);
-PPC_ST_OP(hbr, st16r);
-PPC_ST_OP(wbr, st32r);
-#if defined(TARGET_PPC64)
-PPC_LD_OP_64(hbr, ld16r);
-PPC_LD_OP_64(wbr, ld32r);
-PPC_ST_OP_64(hbr, st16r);
-PPC_ST_OP_64(wbr, st32r);
+PPC_LD_OP_64(hbr, lduwr);
+PPC_LD_OP_64(wbr, ldulr);
+PPC_ST_OP_64(hbr, stwr);
+PPC_ST_OP_64(wbr, stlr);
 #endif
 
+/* Reverse-endian fixed-point loads and stores with byte-reverse             */
 PPC_LD_OP(hbr_le, lduw);
-PPC_LD_OP(wbr_le, ldl);
+PPC_LD_OP(wbr_le, ldul);
 PPC_ST_OP(hbr_le, stw);
 PPC_ST_OP(wbr_le, stl);
 #if defined(TARGET_PPC64)
 PPC_LD_OP_64(hbr_le, lduw);
-PPC_LD_OP_64(wbr_le, ldl);
+PPC_LD_OP_64(wbr_le, ldul);
 PPC_ST_OP_64(hbr_le, stw);
 PPC_ST_OP_64(wbr_le, stl);
 #endif
 
-/***                    Integer load and store multiple                    ***/
+/* Native-endian fixed-point loads and stores multiple                       */
 void OPPROTO glue(op_lmw, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
-{
-    glue(do_lmw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
-void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
 {
     glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
+#endif
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
+/* Reverse-endian fixed-point loads and stores multiple                      */
+void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
 {
-    glue(do_stmw_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
 void OPPROTO glue(op_stmw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
-{
-    glue(do_stmw_le_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-/***                    Integer load and store strings                     ***/
-void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw, MEMSUFFIX)(PARAM1);
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+    glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+/* Native-endian loads and stores string                                     */
+void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -303,6 +216,7 @@
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
@@ -320,13 +234,44 @@
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
+void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
+{
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
+/* Reverse-endian loads and stores string                                    */
+void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+{
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
 void OPPROTO glue(op_lswx_le, MEMSUFFIX) (void)
 {
     /* Note: T1 comes from xer_bc then no cast is needed */
@@ -337,6 +282,7 @@
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
@@ -354,29 +300,16 @@
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
-{
-    glue(do_stsw, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
-{
-    glue(do_stsw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
 void OPPROTO glue(op_stsw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
@@ -384,7 +317,7 @@
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_stsw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_stsw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -432,38 +365,9 @@
 PPC_STF_OP_64(fiwx, stfiwx);
 #endif
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 static always_inline void glue(stfsr, MEMSUFFIX) (target_ulong EA, double d)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = float64_to_float32(d, &env->fp_status);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stfl, MEMSUFFIX)(EA, u.f);
+    glue(stflr, MEMSUFFIX)(EA, float64_to_float32(d, &env->fp_status));
 }
 
 static always_inline void glue(stfiwxr, MEMSUFFIX) (target_ulong EA, double d)
@@ -475,11 +379,7 @@
 
     /* Store the low order 32 bits without any conversion */
     u.d = d;
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stl, MEMSUFFIX)(EA, u.u);
+    glue(stlr, MEMSUFFIX)(EA, u.u);
 }
 
 PPC_STF_OP(fd_le, stfqr);
@@ -520,40 +420,9 @@
 PPC_LDF_OP_64(fs, ldfs);
 #endif
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 static always_inline double glue(ldfsr, MEMSUFFIX) (target_ulong EA)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = glue(ldfl, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-
-    return float32_to_float64(u.f, &env->fp_status);
+    return float32_to_float64(glue(ldflr, MEMSUFFIX)(EA), &env->fp_status);
 }
 
 PPC_LDF_OP(fd_le, ldfqr);
@@ -569,7 +438,7 @@
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -581,7 +450,7 @@
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -615,7 +484,7 @@
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -627,7 +496,7 @@
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -638,7 +507,7 @@
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -649,7 +518,7 @@
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -731,7 +600,7 @@
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -748,7 +617,7 @@
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -764,7 +633,7 @@
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -780,7 +649,7 @@
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -862,6 +731,7 @@
 
 void OPPROTO glue(op_dcbz, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
@@ -940,7 +810,7 @@
 
 void OPPROTO glue(op_dcbz_64, MEMSUFFIX) (void)
 {
-    glue(do_dcbz_64, MEMSUFFIX)();
+    glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -948,6 +818,7 @@
 /* Instruction cache block invalidate */
 void OPPROTO glue(op_icbi, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
@@ -955,7 +826,7 @@
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_icbi_64, MEMSUFFIX) (void)
 {
-    glue(do_icbi_64, MEMSUFFIX)();
+    glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -963,14 +834,14 @@
 /* External access */
 void OPPROTO glue(op_eciwx, MEMSUFFIX) (void)
 {
-    T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
@@ -991,28 +862,28 @@
 
 void OPPROTO glue(op_eciwx_le, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_le_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
 
 void OPPROTO glue(op_ecowx_le, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_ecowx_le_64, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
     RETURN();
 }
 #endif
@@ -1070,8 +941,8 @@
 
 void OPPROTO glue(op_vr_lvx_le, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint32_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint32_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint32_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx, MEMSUFFIX) (void)
@@ -1082,8 +953,8 @@
 
 void OPPROTO glue(op_vr_stvx_le, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 
 #if defined(TARGET_PPC64)
@@ -1095,8 +966,8 @@
 
 void OPPROTO glue(op_vr_lvx_le_64, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint64_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint64_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint64_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx_64, MEMSUFFIX) (void)
@@ -1107,8 +978,8 @@
 
 void OPPROTO glue(op_vr_stvx_le_64, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 #endif
 #undef VR_DWORD0
@@ -1163,14 +1034,14 @@
 #if !defined(TARGET_PPC64)
 PPC_SPE_LD_OP(dd, ldq);
 PPC_SPE_ST_OP(dd, stq);
-PPC_SPE_LD_OP(dd_le, ld64r);
-PPC_SPE_ST_OP(dd_le, st64r);
+PPC_SPE_LD_OP(dd_le, ldqr);
+PPC_SPE_ST_OP(dd_le, stqr);
 #endif
 static always_inline uint64_t glue(spe_ldw, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ldl, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ldl, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldul, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldul, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw, spe_ldw);
@@ -1184,16 +1055,16 @@
 static always_inline uint64_t glue(spe_ldw_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld32r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld32r, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldulr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldulr, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw_le, spe_ldw_le);
 static always_inline void glue(spe_stdw_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data >> 32);
-    glue(st32r, MEMSUFFIX)(EA + 4, data);
+    glue(stlr, MEMSUFFIX)(EA, data >> 32);
+    glue(stlr, MEMSUFFIX)(EA + 4, data);
 }
 PPC_SPE_ST_OP(dw_le, spe_stdw_le);
 static always_inline uint64_t glue(spe_ldh, MEMSUFFIX) (target_ulong EA)
@@ -1218,20 +1089,20 @@
 static always_inline uint64_t glue(spe_ldh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 4) << 16;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 6);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 4) << 16;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 6);
     return ret;
 }
 PPC_SPE_LD_OP(dh_le, spe_ldh_le);
 static always_inline void glue(spe_stdh_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 4, data >> 16);
-    glue(st16r, MEMSUFFIX)(EA + 6, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 4, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA + 6, data);
 }
 PPC_SPE_ST_OP(dh_le, spe_stdh_le);
 static always_inline uint64_t glue(spe_lwhe, MEMSUFFIX) (target_ulong EA)
@@ -1252,16 +1123,16 @@
 static always_inline uint64_t glue(spe_lwhe_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 16;
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 16;
     return ret;
 }
 PPC_SPE_LD_OP(whe_le, spe_lwhe_le);
 static always_inline void glue(spe_stwhe_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 16);
 }
 PPC_SPE_ST_OP(whe_le, spe_stwhe_le);
 static always_inline uint64_t glue(spe_lwhou, MEMSUFFIX) (target_ulong EA)
@@ -1290,24 +1161,24 @@
 static always_inline uint64_t glue(spe_lwhou_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2);
     return ret;
 }
 PPC_SPE_LD_OP(whou_le, spe_lwhou_le);
 static always_inline uint64_t glue(spe_lwhos_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = ((uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA))) << 32;
-    ret |= (uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA + 2));
+    ret = ((uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA))) << 32;
+    ret |= (uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA + 2));
     return ret;
 }
 PPC_SPE_LD_OP(whos_le, spe_lwhos_le);
 static always_inline void glue(spe_stwho_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 2, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 2, data);
 }
 PPC_SPE_ST_OP(who_le, spe_stwho_le);
 #if !defined(TARGET_PPC64)
@@ -1320,7 +1191,7 @@
 static always_inline void glue(spe_stwwo_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data);
+    glue(stlr, MEMSUFFIX)(EA, data);
 }
 PPC_SPE_ST_OP(wwo_le, spe_stwwo_le);
 #endif
@@ -1334,14 +1205,14 @@
 static always_inline uint64_t glue(spe_lh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 48) | ((uint64_t)tmp << 16);
 }
 PPC_SPE_LD_OP(h_le, spe_lh_le);
 static always_inline uint64_t glue(spe_lwwsplat, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ldl, MEMSUFFIX)(EA);
+    tmp = glue(ldul, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat, spe_lwwsplat);
@@ -1349,7 +1220,7 @@
 uint64_t glue(spe_lwwsplat_le, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ld32r, MEMSUFFIX)(EA);
+    tmp = glue(ldulr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat_le, spe_lwwsplat_le);
@@ -1369,9 +1240,9 @@
 {
     uint64_t ret;
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     ret = ((uint64_t)tmp << 48) | ((uint64_t)tmp << 32);
-    tmp = glue(ld16r, MEMSUFFIX)(EA + 2);
+    tmp = glue(lduwr, MEMSUFFIX)(EA + 2);
     ret |= ((uint64_t)tmp << 16) | (uint64_t)tmp;
     return ret;
 }
Index: target-ppc/translate.c
===================================================================
--- target-ppc/translate.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ target-ppc/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -6756,7 +6756,7 @@
                     ctx.nip, 1 - msr_pr, msr_ir);
         }
 #endif
-        ctx.opcode = ldl_code(ctx.nip);
+        ctx.opcode = ldul_code(ctx.nip);
         if (msr_le) {
             ctx.opcode = ((ctx.opcode & 0xFF000000) >> 24) |
                 ((ctx.opcode & 0x00FF0000) >> 8) |
Index: target-sh4/exec.h
===================================================================
--- target-sh4/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-sh4/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -48,6 +48,9 @@
 
 #ifndef CONFIG_USER_ONLY
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 #define RETURN() __asm__ __volatile__("")
Index: target-sh4/op_helper.c
===================================================================
--- target-sh4/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-sh4/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -30,6 +30,7 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -42,6 +43,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr)
 {
     TranslationBlock *tb;
Index: target-sh4/op_mem.c
===================================================================
--- target-sh4/op_mem.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-sh4/op_mem.c	2007-10-15 15:41:10.000000000 +0000
@@ -48,7 +48,7 @@
 }
 
 void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
-    T0 = glue(ldl, MEMSUFFIX) (T0);
+    T0 = glue(ldul, MEMSUFFIX) (T0);
     RETURN();
 }
 
Index: target-sparc/exec.h
===================================================================
--- target-sparc/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-sparc/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -100,6 +100,9 @@
 /* XXX: move that to a generic header */
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-sparc/helper.c
===================================================================
--- target-sparc/helper.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-sparc/helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -130,7 +130,7 @@
     /* SPARC reference MMU table walk: Context table->L1->L2->PTE */
     /* Context base + context number */
     pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
 
     /* Ctx pde */
     switch (pde & PTE_ENTRYTYPE_MASK) {
@@ -142,7 +142,7 @@
         return 4 << 2;
     case 1: /* L0 PDE */
         pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
-        pde = ldl_phys(pde_ptr);
+        pde = ldul_phys(pde_ptr);
 
         switch (pde & PTE_ENTRYTYPE_MASK) {
         default:
@@ -152,7 +152,7 @@
             return (1 << 8) | (4 << 2);
         case 1: /* L1 PDE */
             pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
-            pde = ldl_phys(pde_ptr);
+            pde = ldul_phys(pde_ptr);
 
             switch (pde & PTE_ENTRYTYPE_MASK) {
             default:
@@ -162,7 +162,7 @@
                 return (2 << 8) | (4 << 2);
             case 1: /* L2 PDE */
                 pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
-                pde = ldl_phys(pde_ptr);
+                pde = ldul_phys(pde_ptr);
 
                 switch (pde & PTE_ENTRYTYPE_MASK) {
                 default:
@@ -266,7 +266,7 @@
     /* Context base + context number */
     pde_ptr = (target_phys_addr_t)(env->mmuregs[1] << 4) +
         (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
 
     switch (pde & PTE_ENTRYTYPE_MASK) {
     default:
@@ -278,7 +278,7 @@
         if (mmulev == 3)
             return pde;
         pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
-        pde = ldl_phys(pde_ptr);
+        pde = ldul_phys(pde_ptr);
 
         switch (pde & PTE_ENTRYTYPE_MASK) {
         default:
@@ -291,7 +291,7 @@
             if (mmulev == 2)
                 return pde;
             pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
-            pde = ldl_phys(pde_ptr);
+            pde = ldul_phys(pde_ptr);
 
             switch (pde & PTE_ENTRYTYPE_MASK) {
             default:
@@ -304,7 +304,7 @@
                 if (mmulev == 1)
                     return pde;
                 pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
-                pde = ldl_phys(pde_ptr);
+                pde = ldul_phys(pde_ptr);
 
                 switch (pde & PTE_ENTRYTYPE_MASK) {
                 default:
@@ -331,7 +331,7 @@
 
     printf("MMU dump:\n");
     pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
     printf("Root ptr: " TARGET_FMT_plx ", ctx: %d\n",
            (target_phys_addr_t)env->mmuregs[1] << 4, env->mmuregs[2]);
     for (n = 0, va = 0; n < 256; n++, va += 16 * 1024 * 1024) {
Index: target-sparc/op_helper.c
===================================================================
--- target-sparc/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-sparc/op_helper.c	2007-10-15 15:42:33.000000000 +0000
@@ -241,11 +241,11 @@
             break;
         default:
         case 4:
-            ret = ldl_code(T0 & ~3);
+            ret = ldul_code(T0 & ~3);
             break;
         case 8:
-            ret = ldl_code(T0 & ~3);
-            T0 = ldl_code((T0 + 4) & ~3);
+            ret = ldul_code(T0 & ~3);
+            T0 = ldul_code((T0 + 4) & ~3);
             break;
         }
         break;
@@ -259,11 +259,11 @@
             break;
         default:
         case 4:
-            ret = ldl_user(T0 & ~3);
+            ret = ldul_user(T0 & ~3);
             break;
         case 8:
-            ret = ldl_user(T0 & ~3);
-            T0 = ldl_user((T0 + 4) & ~3);
+            ret = ldul_user(T0 & ~3);
+            T0 = ldul_user((T0 + 4) & ~3);
             break;
         }
         break;
@@ -277,11 +277,11 @@
             break;
         default:
         case 4:
-            ret = ldl_kernel(T0 & ~3);
+            ret = ldul_kernel(T0 & ~3);
             break;
         case 8:
-            ret = ldl_kernel(T0 & ~3);
-            T0 = ldl_kernel((T0 + 4) & ~3);
+            ret = ldul_kernel(T0 & ~3);
+            T0 = ldul_kernel((T0 + 4) & ~3);
             break;
         }
         break;
@@ -300,11 +300,11 @@
             break;
         default:
         case 4:
-            ret = ldl_phys(T0 & ~3);
+            ret = ldul_phys(T0 & ~3);
             break;
         case 8:
-            ret = ldl_phys(T0 & ~3);
-            T0 = ldl_phys((T0 + 4) & ~3);
+            ret = ldul_phys(T0 & ~3);
+            T0 = ldul_phys((T0 + 4) & ~3);
             break;
         }
         break;
@@ -321,13 +321,13 @@
             break;
         default:
         case 4:
-            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32));
+            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
+                            | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         case 8:
-            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32));
-            T0 = ldl_phys((target_phys_addr_t)((T0 + 4) & ~3)
+            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
+                            | ((target_phys_addr_t)(asi & 0xf) << 32));
+            T0 = ldul_phys((target_phys_addr_t)((T0 + 4) & ~3)
                            | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         }
@@ -557,7 +557,7 @@
             uint32_t src = T1 & ~3, dst = T0 & ~3, temp;
 
             for (i = 0; i < 32; i += 4, src += 4, dst += 4) {
-                temp = ldl_kernel(src);
+                temp = ldul_kernel(src);
                 stl_kernel(dst, temp);
             }
         }
@@ -660,7 +660,7 @@
                 ret = lduw_raw(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_raw(T0 & ~3);
+                ret = ldul_raw(T0 & ~3);
                 break;
             default:
             case 8:
@@ -810,7 +810,7 @@
                     ret = lduw_hypv(T0 & ~1);
                     break;
                 case 4:
-                    ret = ldl_hypv(T0 & ~3);
+                    ret = ldul_hypv(T0 & ~3);
                     break;
                 default:
                 case 8:
@@ -826,7 +826,7 @@
                     ret = lduw_kernel(T0 & ~1);
                     break;
                 case 4:
-                    ret = ldl_kernel(T0 & ~3);
+                    ret = ldul_kernel(T0 & ~3);
                     break;
                 default:
                 case 8:
@@ -843,7 +843,7 @@
                 ret = lduw_user(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_user(T0 & ~3);
+                ret = ldul_user(T0 & ~3);
                 break;
             default:
             case 8:
@@ -865,7 +865,7 @@
                 ret = lduw_phys(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_phys(T0 & ~3);
+                ret = ldul_phys(T0 & ~3);
                 break;
             default:
             case 8:
@@ -1670,6 +1670,21 @@
 #define ALIGNED_ONLY
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1681,6 +1696,7 @@
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access(target_ulong addr, int is_write, int is_user,
                                 void *retaddr)
Index: target-sparc/op_mem.h
===================================================================
--- target-sparc/op_mem.h.orig	2007-10-14 20:29:26.000000000 +0000
+++ target-sparc/op_mem.h	2007-10-15 15:59:17.000000000 +0000
@@ -23,7 +23,7 @@
     glue(op, MEMSUFFIX)(ADDR(T0), T1);                                      \
 }
 
-SPARC_LD_OP(ld, ldl);
+SPARC_LD_OP(ld, ldul);
 SPARC_LD_OP(ldub, ldub);
 SPARC_LD_OP(lduh, lduw);
 SPARC_LD_OP_S(ldsb, ldsb);
@@ -48,15 +48,15 @@
 
 void OPPROTO glue(op_swap, MEMSUFFIX)(void)
 {
-    target_ulong tmp = glue(ldl, MEMSUFFIX)(ADDR(T0));
+    target_ulong tmp = glue(ldul, MEMSUFFIX)(ADDR(T0));
     glue(stl, MEMSUFFIX)(ADDR(T0), T1);       /* XXX: Should be Atomically */
     T1 = tmp;
 }
 
 void OPPROTO glue(op_ldd, MEMSUFFIX)(void)
 {
-    T1 = glue(ldl, MEMSUFFIX)(ADDR(T0));
-    T0 = glue(ldl, MEMSUFFIX)((ADDR(T0 + 4)));
+    T1 = glue(ldul, MEMSUFFIX)(ADDR(T0));
+    T0 = glue(ldul, MEMSUFFIX)((ADDR(T0 + 4)));
 }
 
 /***                         Floating-point store                          ***/
@@ -84,12 +84,12 @@
 #ifdef TARGET_SPARC64
 void OPPROTO glue(op_lduw, MEMSUFFIX)(void)
 {
-    T1 = (uint64_t)(glue(ldl, MEMSUFFIX)(ADDR(T0)) & 0xffffffff);
+    T1 = (uint64_t)(glue(ldul, MEMSUFFIX)(ADDR(T0)) & 0xffffffff);
 }
 
 void OPPROTO glue(op_ldsw, MEMSUFFIX)(void)
 {
-    T1 = (int64_t)(glue(ldl, MEMSUFFIX)(ADDR(T0)) & 0xffffffff);
+    T1 = (int64_t)(glue(ldul, MEMSUFFIX)(ADDR(T0)) & 0xffffffff);
 }
 
 SPARC_LD_OP(ldx, ldq);
Index: target-sparc/translate.c
===================================================================
--- target-sparc/translate.c.orig	2007-10-14 16:47:11.000000000 +0000
+++ target-sparc/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -1096,7 +1096,7 @@
 {
     unsigned int insn, opc, rs1, rs2, rd;
 
-    insn = ldl_code(dc->pc);
+    insn = ldul_code(dc->pc);
     opc = GET_FIELD(insn, 0, 1);
 
     rd = GET_FIELD(insn, 2, 6);

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-14 12:59 ` Blue Swirl
@ 2007-10-15 12:10   ` J. Mayer
  2007-10-15 16:02     ` Blue Swirl
  0 siblings, 1 reply; 20+ messages in thread
From: J. Mayer @ 2007-10-15 12:10 UTC (permalink / raw)
  To: qemu-devel

On Sun, 2007-10-14 at 15:59 +0300, Blue Swirl wrote:
> On 10/14/07, J. Mayer <l_indien@magic.fr> wrote:
> > Here's an updated version of the patch against current CVS.
> > This patches provides reverse-endian, little-endian and big-endian
> > memory accessors, available with and without softmmu. It also provides
> > an IO_MEM_REVERSE TLB flag to allow future support of per-page
> > endianness control, which is required by some targets CPU emulations.
> > Having reverse-endian memory accessors also make it possible to optimise
> > reverse-endian memory access when the target CPU has dedicated
> > instructions. For now, it includes optimisations for the PowerPC target.
> 
> This breaks Sparc32 softmmu, I get a black screen. Your changes to
> target-sparc and hw/sun4m.c look fine, so the problem could be in IO?

Did it worked before my commits ? I may have done something wrong during
the merge...
I will do more checks and more tests...

-- 
J. Mayer <l_indien@magic.fr>
Never organized

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
  2007-10-14 11:49 J. Mayer
@ 2007-10-14 12:59 ` Blue Swirl
  2007-10-15 12:10   ` J. Mayer
  0 siblings, 1 reply; 20+ messages in thread
From: Blue Swirl @ 2007-10-14 12:59 UTC (permalink / raw)
  To: qemu-devel

On 10/14/07, J. Mayer <l_indien@magic.fr> wrote:
> Here's an updated version of the patch against current CVS.
> This patches provides reverse-endian, little-endian and big-endian
> memory accessors, available with and without softmmu. It also provides
> an IO_MEM_REVERSE TLB flag to allow future support of per-page
> endianness control, which is required by some targets CPU emulations.
> Having reverse-endian memory accessors also make it possible to optimise
> reverse-endian memory access when the target CPU has dedicated
> instructions. For now, it includes optimisations for the PowerPC target.

This breaks Sparc32 softmmu, I get a black screen. Your changes to
target-sparc and hw/sun4m.c look fine, so the problem could be in IO?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Qemu-devel] RFC: reverse-endian softmmu memory accessors
@ 2007-10-14 11:49 J. Mayer
  2007-10-14 12:59 ` Blue Swirl
  0 siblings, 1 reply; 20+ messages in thread
From: J. Mayer @ 2007-10-14 11:49 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 590 bytes --]

Here's an updated version of the patch against current CVS.
This patches provides reverse-endian, little-endian and big-endian
memory accessors, available with and without softmmu. It also provides
an IO_MEM_REVERSE TLB flag to allow future support of per-page
endianness control, which is required by some targets CPU emulations.
Having reverse-endian memory accessors also make it possible to optimise
reverse-endian memory access when the target CPU has dedicated
instructions. For now, it includes optimisations for the PowerPC target.

-- 
J. Mayer <l_indien@magic.fr>
Never organized

[-- Attachment #2: softmmu_reverse_endian.diff --]
[-- Type: text/x-patch, Size: 181655 bytes --]

Index: cpu-all.h
===================================================================
RCS file: /sources/qemu/qemu/cpu-all.h,v
retrieving revision 1.76
diff -u -d -d -p -r1.76 cpu-all.h
--- cpu-all.h	23 Sep 2007 15:28:03 -0000	1.76
+++ cpu-all.h	14 Oct 2007 11:36:51 -0000
@@ -161,9 +161,9 @@ typedef union {
  *
  * endian is:
  * (empty): target cpu endianness or 8 bit access
- *   r    : reversed target cpu endianness (not implemented yet)
- *   be   : big endian (not implemented yet)
- *   le   : little endian (not implemented yet)
+ *   r    : reversed target cpu endianness
+ *   be   : big endian
+ *   le   : little endian
  *
  * access_type is:
  *   raw    : host memory access
@@ -215,7 +215,32 @@ static inline int ldsw_le_p(void *ptr)
 #endif
 }
 
-static inline int ldl_le_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return (uint32_t)val;
+#else
+    uint8_t *p = ptr;
+    return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+#endif
+}
+
+static inline int64_t ldsl_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return (int32_t)val;
+#else
+    uint8_t *p = ptr;
+    return (int32_t)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24));
+#endif
+}
+#else
+static inline int ldul_le_p(void *ptr)
 {
 #ifdef __powerpc__
     int val;
@@ -226,13 +251,14 @@ static inline int ldl_le_p(void *ptr)
     return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
 #endif
 }
+#endif
 
 static inline uint64_t ldq_le_p(void *ptr)
 {
     uint8_t *p = ptr;
     uint32_t v1, v2;
-    v1 = ldl_le_p(p);
-    v2 = ldl_le_p(p + 4);
+    v1 = ldul_le_p(p);
+    v2 = ldul_le_p(p + 4);
     return v1 | ((uint64_t)v2 << 32);
 }
 
@@ -275,7 +301,7 @@ static inline float32 ldfl_le_p(void *pt
         float32 f;
         uint32_t i;
     } u;
-    u.i = ldl_le_p(ptr);
+    u.i = ldul_le_p(ptr);
     return u.f;
 }
 
@@ -292,8 +318,8 @@ static inline void stfl_le_p(void *ptr, 
 static inline float64 ldfq_le_p(void *ptr)
 {
     CPU_DoubleU u;
-    u.l.lower = ldl_le_p(ptr);
-    u.l.upper = ldl_le_p(ptr + 4);
+    u.l.lower = ldul_le_p(ptr);
+    u.l.upper = ldul_le_p(ptr + 4);
     return u.d;
 }
 
@@ -317,10 +343,22 @@ static inline int ldsw_le_p(void *ptr)
     return *(int16_t *)ptr;
 }
 
-static inline int ldl_le_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_le_p(void *ptr)
+{
+    return *(uint32_t *)ptr;
+}
+
+static inline int64_t ldsl_le_p(void *ptr)
+{
+    return *(int32_t *)ptr;
+}
+#else
+static inline int ldul_le_p(void *ptr)
 {
     return *(uint32_t *)ptr;
 }
+#endif
 
 static inline uint64_t ldq_le_p(void *ptr)
 {
@@ -397,7 +435,38 @@ static inline int ldsw_be_p(void *ptr)
 #endif
 }
 
-static inline int ldl_be_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_be_p(void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+    int val;
+    asm volatile ("movl %1, %0\n"
+                  "bswap %0\n"
+                  : "=r" (val)
+                  : "m" (*(uint32_t *)ptr));
+    return (uint32_t)val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
+#endif
+}
+
+static inline int64_t ldsl_be_p(void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+    int val;
+    asm volatile ("movl %1, %0\n"
+                  "bswap %0\n"
+                  : "=r" (val)
+                  : "m" (*(uint32_t *)ptr));
+    return (int32_t)val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return (int32_t)((b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]);
+#endif
+}
+#else
+static inline int ldul_be_p(void *ptr)
 {
 #if defined(__i386__) || defined(__x86_64__)
     int val;
@@ -411,12 +480,13 @@ static inline int ldl_be_p(void *ptr)
     return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
 #endif
 }
+#endif
 
 static inline uint64_t ldq_be_p(void *ptr)
 {
     uint32_t a,b;
-    a = ldl_be_p(ptr);
-    b = ldl_be_p(ptr+4);
+    a = ldul_be_p(ptr);
+    b = ldul_be_p(ptr+4);
     return (((uint64_t)a<<32)|b);
 }
 
@@ -464,7 +534,7 @@ static inline float32 ldfl_be_p(void *pt
         float32 f;
         uint32_t i;
     } u;
-    u.i = ldl_be_p(ptr);
+    u.i = ldul_be_p(ptr);
     return u.f;
 }
 
@@ -481,8 +551,8 @@ static inline void stfl_be_p(void *ptr, 
 static inline float64 ldfq_be_p(void *ptr)
 {
     CPU_DoubleU u;
-    u.l.upper = ldl_be_p(ptr);
-    u.l.lower = ldl_be_p(ptr + 4);
+    u.l.upper = ldul_be_p(ptr);
+    u.l.lower = ldul_be_p(ptr + 4);
     return u.d;
 }
 
@@ -506,10 +576,22 @@ static inline int ldsw_be_p(void *ptr)
     return *(int16_t *)ptr;
 }
 
-static inline int ldl_be_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_be_p(void *ptr)
+{
+    return *(uint32_t *)ptr;
+}
+
+static inline int64_t ldsl_be_p(void *ptr)
+{
+    return *(int32_t *)ptr;
+}
+#else
+static inline int ldul_be_p(void *ptr)
 {
     return *(uint32_t *)ptr;
 }
+#endif
 
 static inline uint64_t ldq_be_p(void *ptr)
 {
@@ -557,9 +639,13 @@ static inline void stfq_be_p(void *ptr, 
 
 /* target CPU memory access functions */
 #if defined(TARGET_WORDS_BIGENDIAN)
+/* native-endian */
 #define lduw_p(p) lduw_be_p(p)
 #define ldsw_p(p) ldsw_be_p(p)
-#define ldl_p(p) ldl_be_p(p)
+#define ldul_p(p) ldul_be_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_p(p) ldsl_be_p(p)
+#endif
 #define ldq_p(p) ldq_be_p(p)
 #define ldfl_p(p) ldfl_be_p(p)
 #define ldfq_p(p) ldfq_be_p(p)
@@ -568,10 +654,29 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_p(p, v) stq_be_p(p, v)
 #define stfl_p(p, v) stfl_be_p(p, v)
 #define stfq_p(p, v) stfq_be_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_le_p(p)
+#define ldswr_p(p) ldsw_le_p(p)
+#define ldulr_p(p) ldul_le_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_p(p) ldsl_le_p(p)
+#endif
+#define ldqr_p(p) ldq_le_p(p)
+#define ldflr_p(p) ldfl_le_p(p)
+#define ldfqr_p(p) ldfq_le_p(p)
+#define stwr_p(p, v) stw_le_p(p, v)
+#define stlr_p(p, v) stl_le_p(p, v)
+#define stqr_p(p, v) stq_le_p(p, v)
+#define stflr_p(p, v) stfl_le_p(p, v)
+#define stfqr_p(p, v) stfq_le_p(p, v)
 #else
+/* native-endian */
 #define lduw_p(p) lduw_le_p(p)
 #define ldsw_p(p) ldsw_le_p(p)
-#define ldl_p(p) ldl_le_p(p)
+#define ldul_p(p) ldul_le_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_p(p) ldsl_le_p(p)
+#endif
 #define ldq_p(p) ldq_le_p(p)
 #define ldfl_p(p) ldfl_le_p(p)
 #define ldfq_p(p) ldfq_le_p(p)
@@ -580,6 +685,21 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_p(p, v) stq_le_p(p, v)
 #define stfl_p(p, v) stfl_le_p(p, v)
 #define stfq_p(p, v) stfq_le_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_be_p(p)
+#define ldswr_p(p) ldsw_be_p(p)
+#define ldulr_p(p) ldul_be_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_p(p) ldsl_be_p(p)
+#endif
+#define ldqr_p(p) ldq_be_p(p)
+#define ldflr_p(p) ldfl_be_p(p)
+#define ldfqr_p(p) ldfq_be_p(p)
+#define stwr_p(p, v) stw_be_p(p, v)
+#define stlr_p(p, v) stl_be_p(p, v)
+#define stqr_p(p, v) stq_be_p(p, v)
+#define stflr_p(p, v) stfl_be_p(p, v)
+#define stfqr_p(p, v) stfq_be_p(p, v)
 #endif
 
 /* MMU memory access macros */
@@ -605,11 +725,15 @@ static inline void stfq_be_p(void *ptr, 
 #define laddr(x) (uint8_t *)(long)(x)
 #endif
 
+/* native-endian */
 #define ldub_raw(p) ldub_p(laddr((p)))
 #define ldsb_raw(p) ldsb_p(laddr((p)))
 #define lduw_raw(p) lduw_p(laddr((p)))
 #define ldsw_raw(p) ldsw_p(laddr((p)))
-#define ldl_raw(p) ldl_p(laddr((p)))
+#define ldul_raw(p) ldul_p(laddr((p)))
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_raw(p) ldsl_p(laddr((p)))
+#endif
 #define ldq_raw(p) ldq_p(laddr((p)))
 #define ldfl_raw(p) ldfl_p(laddr((p)))
 #define ldfq_raw(p) ldfq_p(laddr((p)))
@@ -619,16 +743,112 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_raw(p, v) stq_p(saddr((p)), v)
 #define stfl_raw(p, v) stfl_p(saddr((p)), v)
 #define stfq_raw(p, v) stfq_p(saddr((p)), v)
-
+/* reverse endian */
+#define ldubr_raw(p) ldub_p(laddr((p)))
+#define ldsbr_raw(p) ldsb_p(laddr((p)))
+#define lduwr_raw(p) lduwr_p(laddr((p)))
+#define ldswr_raw(p) ldswr_p(laddr((p)))
+#define ldulr_raw(p) ldulr_p(laddr((p)))
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_raw(p) ldslr_p(laddr((p)))
+#endif
+#define ldqr_raw(p) ldqr_p(laddr((p)))
+#define ldflr_raw(p) ldflr_p(laddr((p)))
+#define ldfqr_raw(p) ldfqr_p(laddr((p)))
+#define stbr_raw(p, v) stb_p(saddr((p)), v)
+#define stwr_raw(p, v) stwr_p(saddr((p)), v)
+#define stlr_raw(p, v) stlr_p(saddr((p)), v)
+#define stqr_raw(p, v) stqr_p(saddr((p)), v)
+#define stflr_raw(p, v) stflr_p(saddr((p)), v)
+#define stfqr_raw(p, v) stfqr_p(saddr((p)), v)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be_raw(p) ldub_raw(p)
+#define ldsb_be_raw(p) ldsb_raw(p)
+#define lduw_be_raw(p) lduw_raw(p)
+#define ldsw_be_raw(p) ldsw_raw(p)
+#define ldul_be_raw(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_raw(p) ldsl_raw(p)
+#endif
+#define ldq_be_raw(p) ldq_raw(p)
+#define ldfl_be_raw(p) ldfl_raw(p)
+#define ldfq_be_raw(p) ldfq_raw(p)
+#define stb_be_raw(p, v) stb_raw(p, v)
+#define stw_be_raw(p, v) stw_raw(p, v)
+#define stl_be_raw(p, v) stl_raw(p, v)
+#define stq_be_raw(p, v) stq_raw(p, v)
+#define stfl_be_raw(p, v) stfl_raw(p, v)
+#define stfq_be_raw(p, v) stfq_raw(p, v)
+/* little-endian */
+#define ldub_le_raw(p) ldubr_raw(p)
+#define ldsb_le_raw(p) ldsbr_raw(p)
+#define lduw_le_raw(p) lduwr_raw(p)
+#define ldsw_le_raw(p) ldswr_raw(p)
+#define ldul_le_raw(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_raw(p) ldslr_raw(p)
+#endif
+#define ldq_le_raw(p) ldqr_raw(p)
+#define ldfl_le_raw(p) ldflr_raw(p)
+#define ldfq_le_raw(p) ldfqr_raw(p)
+#define stb_le_raw(p, v) stbr_raw(p, v)
+#define stw_le_raw(p, v) stwr_raw(p, v)
+#define stl_le_raw(p, v) stlr_raw(p, v)
+#define stq_le_raw(p, v) stqr_raw(p, v)
+#define stfl_le_raw(p, v) stflr_raw(p, v)
+#define stfq_le_raw(p, v) stfqr_raw(p, v)
+#else
+/* big-endian */
+#define ldub_be_raw(p) ldubr_raw(p)
+#define ldsb_be_raw(p) ldsbr_raw(p)
+#define lduw_be_raw(p) lduwr_raw(p)
+#define ldsw_be_raw(p) ldswr_raw(p)
+#define ldul_be_raw(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_raw(p) ldslr_raw(p)
+#endif
+#define ldq_be_raw(p) ldqr_raw(p)
+#define ldfl_be_raw(p) ldflr_raw(p)
+#define ldfq_be_raw(p) ldfqr_raw(p)
+#define stb_be_raw(p, v) stbr_raw(p, v)
+#define stw_be_raw(p, v) stwr_raw(p, v)
+#define stl_be_raw(p, v) stlr_raw(p, v)
+#define stq_be_raw(p, v) stqr_raw(p, v)
+#define stfl_be_raw(p, v) stflr_raw(p, v)
+#define stfq_be_raw(p, v) stfqr_raw(p, v)
+/* little-endian */
+#define ldub_le_raw(p) ldub_raw(p)
+#define ldsb_le_raw(p) ldsb_raw(p)
+#define lduw_le_raw(p) lduw_raw(p)
+#define ldsw_le_raw(p) ldsw_raw(p)
+#define ldul_le_raw(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_raw(p) ldsl_raw(p)
+#endif
+#define ldq_le_raw(p) ldq_raw(p)
+#define ldfl_le_raw(p) ldfl_raw(p)
+#define ldfq_le_raw(p) ldfq_raw(p)
+#define stb_le_raw(p, v) stb_raw(p, v)
+#define stw_le_raw(p, v) stw_raw(p, v)
+#define stl_le_raw(p, v) stl_raw(p, v)
+#define stq_le_raw(p, v) stq_raw(p, v)
+#define stfl_le_raw(p, v) stfl_raw(p, v)
+#define stfq_le_raw(p, v) stfq_raw(p, v)
+#endif
 
 #if defined(CONFIG_USER_ONLY)
 
 /* if user mode, no other memory access functions */
+/* native-endian */
 #define ldub(p) ldub_raw(p)
 #define ldsb(p) ldsb_raw(p)
 #define lduw(p) lduw_raw(p)
 #define ldsw(p) ldsw_raw(p)
-#define ldl(p) ldl_raw(p)
+#define ldul(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl(p) ldsl_raw(p)
+#endif
 #define ldq(p) ldq_raw(p)
 #define ldfl(p) ldfl_raw(p)
 #define ldfq(p) ldfq_raw(p)
@@ -638,19 +858,173 @@ static inline void stfq_be_p(void *ptr, 
 #define stq(p, v) stq_raw(p, v)
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr(p) ldub_raw(p)
+#define ldsbr(p) ldsb_raw(p)
+#define lduwr(p) lduwr_raw(p)
+#define ldswr(p) ldswr_raw(p)
+#define ldulr(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr(p) ldslr_raw(p)
+#endif
+#define ldqr(p) ldqr_raw(p)
+#define ldflr(p) ldflr_raw(p)
+#define ldfqr(p) ldfqr_raw(p)
+#define stbr(p, v) stb_raw(p, v)
+#define stwr(p, v) stwr_raw(p, v)
+#define stlr(p, v) stlr_raw(p, v)
+#define stqr(p, v) stqr_raw(p, v)
+#define stflr(p, v) stflr_raw(p, v)
+#define stfqr(p, v) stfqr_raw(p, v)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be(p) ldub(p)
+#define ldsb_be(p) ldsb(p)
+#define lduw_be(p) lduw(p)
+#define ldsw_be(p) ldsw(p)
+#define ldul_be(p) ldul(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be(p) ldsl(p)
+#endif
+#define ldq_be(p) ldq(p)
+#define ldfl_be(p) ldfl(p)
+#define ldfq_be(p) ldfq(p)
+#define stb_be(p, v) stb(p, v)
+#define stw_be(p, v) stw(p, v)
+#define stl_be(p, v) stl(p, v)
+#define stq_be(p, v) stq(p, v)
+#define stfl_be(p, v) stfl(p, v)
+#define stfq_be(p, v) stfq(p, v)
+/* little-endian */
+#define ldub_le(p) ldubr(p)
+#define ldsb_le(p) ldsbr(p)
+#define lduw_le(p) lduwr(p)
+#define ldsw_le(p) ldswr(p)
+#define ldul_le(p) ldulr(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le(p) ldslr(p)
+#endif
+#define ldq_le(p) ldqr(p)
+#define ldfl_le(p) ldflr(p)
+#define ldfq_le(p) ldfqr(p)
+#define stb_le(p, v) stbr(p, v)
+#define stw_le(p, v) stwr(p, v)
+#define stl_le(p, v) stlr(p, v)
+#define stq_le(p, v) stqr(p, v)
+#define stfl_le(p, v) stflr(p, v)
+#define stfq_le(p, v) stfqr(p, v)
+#else
+/* big-endian */
+#define ldub_be(p) ldubr(p)
+#define ldsb_be(p) ldsbr(p)
+#define lduw_be(p) lduwr(p)
+#define ldsw_be(p) ldswr(p)
+#define ldul_be(p) ldulr(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be(p) ldslr(p)
+#endif
+#define ldq_be(p) ldqr(p)
+#define ldfl_be(p) ldflr(p)
+#define ldfq_be(p) ldfqr(p)
+#define stb_be(p, v) stbr(p, v)
+#define stw_be(p, v) stwr(p, v)
+#define stl_be(p, v) stlr(p, v)
+#define stq_be(p, v) stqr(p, v)
+#define stfl_be(p, v) stflr(p, v)
+#define stfq_be(p, v) stfqr(p, v)
+/* little-endian */
+#define ldub_le(p) ldub(p)
+#define ldsb_le(p) ldsb(p)
+#define lduw_le(p) lduw(p)
+#define ldsw_le(p) ldsw(p)
+#define ldul_le(p) ldul(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le(p) ldsl(p)
+#endif
+#define ldq_le(p) ldq(p)
+#define ldfl_le(p) ldfl(p)
+#define ldfq_le(p) ldfq(p)
+#define stb_le(p, v) stb(p, v)
+#define stw_le(p, v) stw(p, v)
+#define stl_le(p, v) stl(p, v)
+#define stq_le(p, v) stq(p, v)
+#define stfl_le(p, v) stfl(p, v)
+#define stfq_le(p, v) stfq(p, v)
+#endif
 
+/* native-endian */
 #define ldub_code(p) ldub_raw(p)
 #define ldsb_code(p) ldsb_raw(p)
 #define lduw_code(p) lduw_raw(p)
 #define ldsw_code(p) ldsw_raw(p)
-#define ldl_code(p) ldl_raw(p)
+#define ldul_code(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_code(p) ldsl_raw(p)
+#endif
 #define ldq_code(p) ldq_raw(p)
+/* reverse-endian */
+#define ldubr_code(p) ldub_raw(p)
+#define ldsbr_code(p) ldsb_raw(p)
+#define lduwr_code(p) lduwr_raw(p)
+#define ldswr_code(p) ldswr_raw(p)
+#define ldulr_code(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_code(p) ldslr_raw(p)
+#endif
+#define ldqr_code(p) ldqr_raw(p)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be_code(p) ldub_code(p)
+#define ldsb_be_code(p) ldsb_code(p)
+#define lduw_be_code(p) lduw_code(p)
+#define ldsw_be_code(p) ldsw_code(p)
+#define ldul_be_code(p) ldul_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_code(p) ldsl_code(p)
+#endif
+#define ldq_be_code(p) ldq_code(p)
+/* little-endian */
+#define ldub_le_code(p) ldubr_code(p)
+#define ldsb_le_code(p) ldsbr_code(p)
+#define lduw_le_code(p) lduwr_code(p)
+#define ldsw_le_code(p) ldswr_code(p)
+#define ldul_le_code(p) ldulr_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_code(p) ldslr_code(p)
+#endif
+#define ldq_le_code(p) ldqr_code(p)
+#else
+/* big-endian */
+#define ldub_be_code(p) ldubr_code(p)
+#define ldsb_be_code(p) ldsbr_code(p)
+#define lduw_be_code(p) lduwr_code(p)
+#define ldsw_be_code(p) ldswr_code(p)
+#define ldul_be_code(p) ldulr_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_code(p) ldslr_code(p)
+#endif
+#define ldq_be_code(p) ldqr_code(p)
+/* little-endian */
+#define ldub_le_code(p) ldub_code(p)
+#define ldsb_le_code(p) ldsb_code(p)
+#define lduw_le_code(p) lduw_code(p)
+#define ldsw_le_code(p) ldsw_code(p)
+#define ldul_le_code(p) ldul_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_code(p) ldsl_code(p)
+#endif
+#define ldq_le_code(p) ldq_code(p)
+#endif
 
+/* native-endian */
 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
 #define lduw_kernel(p) lduw_raw(p)
 #define ldsw_kernel(p) ldsw_raw(p)
-#define ldl_kernel(p) ldl_raw(p)
+#define ldul_kernel(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_kernel(p) ldsl_raw(p)
+#endif
 #define ldq_kernel(p) ldq_raw(p)
 #define ldfl_kernel(p) ldfl_raw(p)
 #define ldfq_kernel(p) ldfq_raw(p)
@@ -660,6 +1034,99 @@ static inline void stfq_be_p(void *ptr, 
 #define stq_kernel(p, v) stq_raw(p, v)
 #define stfl_kernel(p, v) stfl_raw(p, v)
 #define stfq_kernel(p, vt) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr_kernel(p) ldub_raw(p)
+#define ldsbr_kernel(p) ldsb_raw(p)
+#define lduwr_kernel(p) lduwr_raw(p)
+#define ldswr_kernel(p) ldswr_raw(p)
+#define ldulr_kernel(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_kernel(p) ldslr_raw(p)
+#endif
+#define ldqr_kernel(p) ldqr_raw(p)
+#define ldflr_kernel(p) ldflr_raw(p)
+#define ldfqr_kernel(p) ldfqr_raw(p)
+#define stbr_kernel(p, v) stbr_raw(p, v)
+#define stwr_kernel(p, v) stwr_raw(p, v)
+#define stlr_kernel(p, v) stlr_raw(p, v)
+#define stqr_kernel(p, v) stqr_raw(p, v)
+#define stflr_kernel(p, v) stflr_raw(p, v)
+#define stfqr_kernel(p, vt) stfqr_raw(p, v)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be_kernel(p) ldub_kernel(p)
+#define ldsb_be_kernel(p) ldsb_kernel(p)
+#define lduw_be_kernel(p) lduw_kernel(p)
+#define ldsw_be_kernel(p) ldsw_kernel(p)
+#define ldul_be_kernel(p) ldul_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_kernel(p) ldsl_kernel(p)
+#endif
+#define ldq_be_kernel(p) ldq_kernel(p)
+#define ldfl_be_kernel(p) ldfl_kernel(p)
+#define ldfq_be_kernel(p) ldfq_kernel(p)
+#define stb_be_kernel(p, v) stb_kernel(p, v)
+#define stw_be_kernel(p, v) stw_kernel(p, v)
+#define stl_be_kernel(p, v) stl_kernel(p, v)
+#define stq_be_kernel(p, v) stq_kernel(p, v)
+#define stfl_be_kernel(p, v) stfl_kernel(p, v)
+#define stfq_be_kernel(p, vt) stfq_kernel(p, vt)
+/* little-endian */
+#define ldub_le_kernel(p) ldubr_kernel(p)
+#define ldsb_le_kernel(p) ldsbr_kernel(p)
+#define lduw_le_kernel(p) lduwr_kernel(p)
+#define ldsw_le_kernel(p) ldswr_kernel(p)
+#define ldul_le_kernel(p) ldulr_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_kernel(p) ldslr_kernel(p)
+#endif
+#define ldq_le_kernel(p) ldqr_kernel(p)
+#define ldfl_le_kernel(p) ldflr_kernel(p)
+#define ldfq_le_kernel(p) ldfqr_kernel(p)
+#define stb_le_kernel(p, v) stbr_kernel(p, v)
+#define stw_le_kernel(p, v) stwr_kernel(p, v)
+#define stl_le_kernel(p, v) stlr_kernel(p, v)
+#define stq_le_kernel(p, v) stqr_kernel(p, v)
+#define stfl_le_kernel(p, v) stflr_kernel(p, v)
+#define stfq_le_kernel(p, vt) stfqr_kernel(p, vt)
+#else
+/* big-endian */
+#define ldub_be_kernel(p) ldubr_kernel(p)
+#define ldsb_be_kernel(p) ldsbr_kernel(p)
+#define lduw_be_kernel(p) lduwr_kernel(p)
+#define ldsw_be_kernel(p) ldswr_kernel(p)
+#define ldul_be_kernel(p) ldulr_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_kernel(p) ldslr_kernel(p)
+#endif
+#define ldq_be_kernel(p) ldqr_kernel(p)
+#define ldfl_be_kernel(p) ldflr_kernel(p)
+#define ldfq_be_kernel(p) ldfqr_kernel(p)
+#define stb_be_kernel(p, v) stbr_kernel(p, v)
+#define stw_be_kernel(p, v) stwr_kernel(p, v)
+#define stl_be_kernel(p, v) stlr_kernel(p, v)
+#define stq_be_kernel(p, v) stqr_kernel(p, v)
+#define stfl_be_kernel(p, v) stflr_kernel(p, v)
+#define stfq_be_kernel(p, vt) stfqr_kernel(p, vt)
+/* little-endian */
+#define ldub_le_kernel(p) ldub_kernel(p)
+#define ldsb_le_kernel(p) ldsb_kernel(p)
+#define lduw_le_kernel(p) lduw_kernel(p)
+#define ldsw_le_kernel(p) ldsw_kernel(p)
+#define ldul_le_kernel(p) ldul_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_kernel(p) ldsl_kernel(p)
+#endif
+#define ldq_le_kernel(p) ldq_kernel(p)
+#define ldfl_le_kernel(p) ldfl_kernel(p)
+#define ldfq_le_kernel(p) ldfq_kernel(p)
+#define stb_le_kernel(p, v) stb_kernel(p, v)
+#define stw_le_kernel(p, v) stw_kernel(p, v)
+#define stl_le_kernel(p, v) stl_kernel(p, v)
+#define stq_le_kernel(p, v) stq_kernel(p, v)
+#define stfl_le_kernel(p, v) stfl_kernel(p, v)
+#define stfq_le_kernel(p, vt) stfq_kernel(p, vt)
+#endif
 
 #endif /* defined(CONFIG_USER_ONLY) */
 
@@ -790,6 +1257,8 @@ extern uint8_t *phys_ram_dirty;
    the physical address */
 #define IO_MEM_ROMD        (1)
 #define IO_MEM_SUBPAGE     (2)
+/* On some target CPUs, endiannes is stored in page tables */
+#define IO_MEM_REVERSE     (3)
 
 typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
 typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
@@ -821,7 +1290,7 @@ static inline void cpu_physical_memory_w
 }
 uint32_t ldub_phys(target_phys_addr_t addr);
 uint32_t lduw_phys(target_phys_addr_t addr);
-uint32_t ldl_phys(target_phys_addr_t addr);
+uint32_t ldul_phys(target_phys_addr_t addr);
 uint64_t ldq_phys(target_phys_addr_t addr);
 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val);
 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val);
Index: cpu-exec.c
===================================================================
RCS file: /sources/qemu/qemu/cpu-exec.c,v
retrieving revision 1.120
diff -u -d -d -p -r1.120 cpu-exec.c
--- cpu-exec.c	14 Oct 2007 07:07:04 -0000	1.120
+++ cpu-exec.c	14 Oct 2007 11:36:51 -0000
@@ -436,12 +436,12 @@ int cpu_exec(CPUState *env1)
                          /* FIXME: this should respect TPR */
                          env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
                          svm_check_intercept(SVM_EXIT_VINTR);
-                         intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
+                         intno = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
                          if (loglevel & CPU_LOG_TB_IN_ASM)
                              fprintf(logfile, "Servicing virtual hardware INT=0x%02x\n", intno);
 	                 do_interrupt(intno, 0, 0, -1, 1);
                          stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl),
-                                  ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
+                                  ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
 #if defined(__sparc__) && !defined(HOST_SOLARIS)
                          tmp_T0 = 0;
 #else
Index: exec-all.h
===================================================================
RCS file: /sources/qemu/qemu/exec-all.h,v
retrieving revision 1.68
diff -u -d -d -p -r1.68 exec-all.h
--- exec-all.h	14 Oct 2007 07:07:04 -0000	1.68
+++ exec-all.h	14 Oct 2007 11:36:51 -0000
@@ -569,6 +569,21 @@ void tlb_fill(target_ulong addr, int is_
 #define MEMSUFFIX _code
 #define env cpu_single_env
 
+/* native-endian */
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+
+/* reverse-endian */
+#define REVERSE_ENDIAN
 #define DATA_SIZE 1
 #include "softmmu_header.h"
 
@@ -580,6 +595,7 @@ void tlb_fill(target_ulong addr, int is_
 
 #define DATA_SIZE 8
 #include "softmmu_header.h"
+#undef REVERSE_ENDIAN
 
 #undef ACCESS_TYPE
 #undef MEMSUFFIX
Index: exec.c
===================================================================
RCS file: /sources/qemu/qemu/exec.c,v
retrieving revision 1.109
diff -u -d -d -p -r1.109 exec.c
--- exec.c	14 Oct 2007 07:07:04 -0000	1.109
+++ exec.c	14 Oct 2007 11:36:51 -0000
@@ -2202,7 +2202,7 @@ static uint32_t watch_mem_readw(void *op
 
 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
 {
-    return ldl_phys(addr);
+    return ldul_phys(addr);
 }
 
 /* Generate a debug exception if a watchpoint has been hit.
@@ -2507,7 +2507,7 @@ void cpu_physical_memory_rw(target_phys_
     uint8_t *ptr;
     uint32_t val;
     target_phys_addr_t page;
-    unsigned long pd;
+    unsigned long pd, addr1;
     PhysPageDesc *p;
 
     while (len > 0) {
@@ -2524,31 +2524,54 @@ void cpu_physical_memory_rw(target_phys_
 
         if (is_write) {
             if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                /* XXX: could force cpu_single_env to NULL to avoid
-                   potential bugs */
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit write access */
-                    val = ldl_p(buf);
-                    io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit write access */
-                    val = lduw_p(buf);
-                    io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        stlr_p(ptr, *(uint32_t *)buf);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        stwr_p(ptr, *(uint16_t *)buf);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *ptr = *buf;
+                    goto invalidate_code;
                 } else {
-                    /* 8 bit write access */
-                    val = ldub_p(buf);
-                    io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
-                    l = 1;
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    /* XXX: could force cpu_single_env to NULL to avoid
+                       potential bugs */
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit write access */
+                        val = ldul_p(buf);
+                        io_mem_write[io_index][2](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit write access */
+                        val = lduw_p(buf);
+                        io_mem_write[io_index][1](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit write access */
+                        val = ldub_p(buf);
+                        io_mem_write[io_index][0](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 1;
+                    }
                 }
             } else {
-                unsigned long addr1;
                 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
                 /* RAM case */
                 ptr = phys_ram_base + addr1;
                 memcpy(ptr, buf, l);
+            invalidate_code:
                 if (!cpu_physical_memory_is_dirty(addr1)) {
                     /* invalidate code */
                     tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
@@ -2560,23 +2583,45 @@ void cpu_physical_memory_rw(target_phys_
         } else {
             if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
                 !(pd & IO_MEM_ROMD)) {
-                /* I/O case */
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit read access */
-                    val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
-                    stl_p(buf, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit read access */
-                    val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
-                    stw_p(buf, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        *(uint32_t *)buf = ldulr_p(ptr);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        *(uint16_t *)buf = lduwr_p(ptr);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *buf = *ptr;
                 } else {
-                    /* 8 bit read access */
-                    val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr);
-                    stb_p(buf, val);
-                    l = 1;
+                    /* I/O case */
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit read access */
+                        val = io_mem_read[io_index][2](io_mem_opaque[io_index],
+                                                       addr);
+                        stl_p(buf, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit read access */
+                        val = io_mem_read[io_index][1](io_mem_opaque[io_index],
+                                                       addr);
+                        stw_p(buf, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit read access */
+                        val = io_mem_read[io_index][0](io_mem_opaque[io_index],
+                                                       addr);
+                        stb_p(buf, val);
+                        l = 1;
+                    }
                 }
             } else {
                 /* RAM case */
@@ -2632,7 +2677,7 @@ void cpu_physical_memory_write_rom(targe
 
 
 /* warning: addr must be aligned */
-uint32_t ldl_phys(target_phys_addr_t addr)
+uint32_t ldul_phys(target_phys_addr_t addr)
 {
     int io_index;
     uint8_t *ptr;
@@ -2656,7 +2701,7 @@ uint32_t ldl_phys(target_phys_addr_t add
         /* RAM case */
         ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldl_p(ptr);
+        val = ldul_p(ptr);
     }
     return val;
 }
@@ -2907,6 +2952,7 @@ void dump_exec_info(FILE *f,
 #define env cpu_single_env
 #define SOFTMMU_CODE_ACCESS
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2919,6 +2965,21 @@ void dump_exec_info(FILE *f,
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 #undef env
 
 #endif
Index: monitor.c
===================================================================
RCS file: /sources/qemu/qemu/monitor.c,v
retrieving revision 1.83
diff -u -d -d -p -r1.83 monitor.c
--- monitor.c	25 Sep 2007 17:28:42 -0000	1.83
+++ monitor.c	14 Oct 2007 11:36:51 -0000
@@ -595,7 +595,7 @@ static void memory_dump(int count, int f
                 v = lduw_raw(buf + i);
                 break;
             case 4:
-                v = (uint32_t)ldl_raw(buf + i);
+                v = (uint32_t)ldul_raw(buf + i);
                 break;
             case 8:
                 v = ldq_raw(buf + i);
Index: softmmu_exec.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_exec.h,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 softmmu_exec.h
--- softmmu_exec.h	14 Oct 2007 07:07:05 -0000	1.2
+++ softmmu_exec.h	14 Oct 2007 11:36:51 -0000
@@ -1,13 +1,7 @@
 /* Common softmmu definitions and inline routines.  */
 
-/* XXX: find something cleaner.
- * Furthermore, this is false for 64 bits targets
- */
-#define ldul_user       ldl_user
-#define ldul_kernel     ldl_kernel
-#define ldul_hypv       ldl_hypv
-#define ldul_executive  ldl_executive
-#define ldul_supervisor ldl_supervisor
+#define lduq_user ldq_user
+#define lduq_kernel ldq_kernel
 
 #define ACCESS_TYPE 0
 #define MEMSUFFIX MMU_MODE0_SUFFIX
@@ -104,7 +98,10 @@
 #define ldsb(p) ldsb_data(p)
 #define lduw(p) lduw_data(p)
 #define ldsw(p) ldsw_data(p)
-#define ldl(p) ldl_data(p)
+#define ldul(p) ldul_data(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl(p) ldsl_data(p)
+#endif
 #define ldq(p) ldq_data(p)
 
 #define stb(p, v) stb_data(p, v)
Index: softmmu_header.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_header.h,v
retrieving revision 1.18
diff -u -d -d -p -r1.18 softmmu_header.h
--- softmmu_header.h	14 Oct 2007 07:07:05 -0000	1.18
+++ softmmu_header.h	14 Oct 2007 11:36:51 -0000
@@ -17,27 +17,86 @@
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
+
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
+#if defined(TARGET_WORDS_BIGENDIAN)
+#define ESUFFIX _be
+#else
+#define ESUFFIX _le
+#endif
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
+#define LSUFFIX q
+#define LUSUFFIX q
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
-#define USUFFIX l
+#define USUFFIX ul
+#define LSUFFIX l
+#define LUSUFFIX ul
+#if (TARGET_LONG_BITS == 64)
+#define DATA_STYPE int32_t
+#endif
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define USUFFIX uw
+#define LSUFFIX w
+#define LUSUFFIX uw
 #define DATA_TYPE uint16_t
 #define DATA_STYPE int16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define USUFFIX ub
+#define LSUFFIX b
+#define LUSUFFIX ub
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#else
+#error unsupported data size
+#endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if defined(TARGET_WORDS_BIGENDIAN)
+#define ESUFFIX _le
+#else
+#define ESUFFIX _be
+#endif
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define LSUFFIX q
+#define LUSUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX ulr
+#define LSUFFIX l
+#define LUSUFFIX ul
+#if (TARGET_LONG_BITS == 64)
+#define DATA_STYPE int32_t
+#endif
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define LSUFFIX w
+#define LUSUFFIX uw
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define LSUFFIX b
+#define LUSUFFIX ub
 #define DATA_TYPE uint8_t
 #define DATA_STYPE int8_t
 #else
 #error unsupported data size
 #endif
+#endif /* defined(REVERSE_ENDIAN) */
 
 #if ACCESS_TYPE < (NB_MMU_MODES)
 
@@ -121,7 +180,7 @@ static inline RES_TYPE glue(glue(ld, USU
     return res;
 }
 
-#if DATA_SIZE <= 2
+#if defined(DATA_STYPE)
 static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 {
     int res;
@@ -244,7 +303,7 @@ static inline RES_TYPE glue(glue(ld, USU
     return res;
 }
 
-#if DATA_SIZE <= 2
+#if defined(DATA_STYPE)
 static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 {
     int res, index;
@@ -293,8 +352,29 @@ static inline void glue(glue(st, SUFFIX)
 
 #endif /* !asm */
 
+/* BE/LE access routines */
+static inline RES_TYPE glue(glue(glue(ld, LUSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(glue(ld, USUFFIX), MEMSUFFIX)(ptr);
+}
+
+#if defined(DATA_STYPE)
+static inline RES_TYPE glue(glue(glue(lds, LSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(glue(lds, SUFFIX), MEMSUFFIX)(ptr);
+}
+#endif
+
+#if ACCESS_TYPE != 3
+static inline void glue(glue(glue(st, LSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+{
+    glue(glue(st, SUFFIX), MEMSUFFIX)(ptr, v);
+}
+#endif
+
 #if ACCESS_TYPE != (NB_MMU_MODES + 1)
 
+#if !defined(REVERSE_ENDIAN)
 #if DATA_SIZE == 8
 static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
 {
@@ -306,6 +386,11 @@ static inline float64 glue(ldfq, MEMSUFF
     return u.d;
 }
 
+static inline float64 glue(glue(ldfq, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfq, MEMSUFFIX)(ptr);
+}
+
 static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v)
 {
     union {
@@ -315,6 +400,12 @@ static inline void glue(stfq, MEMSUFFIX)
     u.d = v;
     glue(stq, MEMSUFFIX)(ptr, u.i);
 }
+
+static inline void glue(glue(stfq, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                        float64 v)
+{
+    glue(stfq, MEMSUFFIX)(ptr, v);
+}
 #endif /* DATA_SIZE == 8 */
 
 #if DATA_SIZE == 4
@@ -324,10 +415,15 @@ static inline float32 glue(ldfl, MEMSUFF
         float32 f;
         uint32_t i;
     } u;
-    u.i = glue(ldl, MEMSUFFIX)(ptr);
+    u.i = glue(ldul, MEMSUFFIX)(ptr);
     return u.f;
 }
 
+static inline float32 glue(glue(ldfl, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfl, MEMSUFFIX)(ptr);
+}
+
 static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
 {
     union {
@@ -337,8 +433,84 @@ static inline void glue(stfl, MEMSUFFIX)
     u.f = v;
     glue(stl, MEMSUFFIX)(ptr, u.i);
 }
+
+static inline void glue(glue(stfl, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                        float32 v)
+{
+    glue(stfl, MEMSUFFIX)(ptr, v);
+}
 #endif /* DATA_SIZE == 4 */
 
+#else /* defined(REVERSE_ENDIAN) */
+
+#if DATA_SIZE == 8
+static inline float64 glue(ldfqr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.i = glue(ldqr, MEMSUFFIX)(ptr);
+    return u.d;
+}
+
+static inline float64 glue(glue(ldfqr, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfqr, MEMSUFFIX)(ptr);
+}
+
+static inline void glue(stfqr, MEMSUFFIX)(target_ulong ptr, float64 v)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.d = v;
+    glue(stqr, MEMSUFFIX)(ptr, u.i);
+}
+
+static inline void glue(glue(stfqr, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                         float64 v)
+{
+    glue(stfqr, MEMSUFFIX)(ptr, v);
+}
+#endif /* DATA_SIZE == 8 */
+
+#if DATA_SIZE == 4
+static inline float32 glue(ldflr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = glue(ldulr, MEMSUFFIX)(ptr);
+    return u.f;
+}
+
+static inline float32 glue(glue(ldflr, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldflr, MEMSUFFIX)(ptr);
+}
+
+static inline void glue(stflr, MEMSUFFIX)(target_ulong ptr, float32 v)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    glue(stlr, MEMSUFFIX)(ptr, u.i);
+}
+
+static inline void glue(glue(stflr, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                         float32 v)
+{
+    glue(stflr, MEMSUFFIX)(ptr, v);
+}
+#endif /* DATA_SIZE == 4 */
+
+#endif /* defined(REVERSE_ENDIAN) */
+
 #endif /* ACCESS_TYPE != (NB_MMU_MODES + 1) */
 
 #undef RES_TYPE
@@ -346,7 +518,10 @@ static inline void glue(stfl, MEMSUFFIX)
 #undef DATA_STYPE
 #undef SUFFIX
 #undef USUFFIX
+#undef LSUFFIX
+#undef LUSUFFIX
 #undef DATA_SIZE
 #undef CPU_MMU_INDEX
 #undef MMUSUFFIX
+#undef ESUFFIX
 #undef ADDR_READ
Index: softmmu_template.h
===================================================================
RCS file: /sources/qemu/qemu/softmmu_template.h,v
retrieving revision 1.19
diff -u -d -d -p -r1.19 softmmu_template.h
--- softmmu_template.h	14 Oct 2007 07:07:05 -0000	1.19
+++ softmmu_template.h	14 Oct 2007 11:36:51 -0000
@@ -19,25 +19,66 @@
  */
 #define DATA_SIZE (1 << SHIFT)
 
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
+#define RSUFFIX qr
+#define URSUFFIX qr
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
-#define USUFFIX l
+#define USUFFIX ul
+#define RSUFFIX lr
+#define URSUFFIX ulr
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define USUFFIX uw
+#define RSUFFIX wr
+#define URSUFFIX uwr
 #define DATA_TYPE uint16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define USUFFIX ub
+#define RSUFFIX br
+#define URSUFFIX ubr
 #define DATA_TYPE uint8_t
 #else
 #error unsupported data size
 #endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define RSUFFIX q
+#define URSUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX ulr
+#define RSUFFIX l
+#define URSUFFIX ul
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define RSUFFIX w
+#define URSUFFIX uw
+#define DATA_TYPE uint16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define RSUFFIX b
+#define URSUFFIX ub
+#define DATA_TYPE uint8_t
+#else
+#error unsupported data size
+#endif
+#endif /* defined(REVERSE_ENDIAN) */
+
 
 #ifdef SOFTMMU_CODE_ACCESS
 #define READ_ACCESS_TYPE 2
@@ -47,25 +88,62 @@
 #define ADDR_READ addr_read
 #endif
 
+#if (defined(TARGET_WORDS_BIGENDIAN) && !defined(REVERSE_ENDIAN)) || \
+    (!defined(TARGET_WORDS_BIGENDIAN) && defined(REVERSE_ENDIAN))
+#define ACCESS_WORDS_BIGENDIAN
+#endif
+
+/* Beware: we do not have reverse-endian accessors for IOs */
+#if defined(REVERSE_ENDIAN)
+#define DO_IOSWAP 1
+#else
+#define DO_IOSWAP 0
+#endif
+#if SHIFT == 1
+#define IOSWAP(val) bswap16(val)
+#elif SHIFT >= 2
+#define IOSWAP(val) bswap32(val)
+#else
+#define IOSWAP(val) (val)
+#endif
+
 static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
                                                         int mmu_idx,
                                                         void *retaddr);
 static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
-                                              target_ulong tlb_addr)
+                                              target_ulong tlb_addr,
+                                              int do_ioswap)
 {
     DATA_TYPE res;
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
 #if SHIFT <= 2
     res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        res = IOSWAP(res);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    res = (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr) << 32;
-    res |= io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = (uint64_t)tmp << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= tmp;
 #else
-    res = io_mem_read[index][2](io_mem_opaque[index], physaddr);
-    res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = tmp;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= (uint64_t)tmp << 32;
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -92,10 +170,34 @@ DATA_TYPE REGPARM(1) glue(glue(__ld, SUF
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, READ_ACCESS_TYPE,
+                                            mmu_idx, retaddr);
+                    }
+#endif
+                    res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
@@ -144,10 +246,45 @@ static DATA_TYPE glue(glue(slow_ld, SUFF
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages) */
+                        addr1 = addr & ~(DATA_SIZE - 1);
+                        addr2 = addr1 + DATA_SIZE;
+                        res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
+                                                                      mmu_idx,
+                                                                      retaddr);
+                        res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
+                                                                      mmu_idx,
+                                                                      retaddr);
+                        shift = (addr & (DATA_SIZE - 1)) * 8;
+#ifdef ACCESS_WORDS_BIGENDIAN
+                        res = (res1 >> shift) |
+                            (res2 << ((DATA_SIZE * 8) - shift));
+#else
+                        res = (res1 << shift) |
+                            (res2 >> ((DATA_SIZE * 8) - shift));
+#endif
+                        res = (DATA_TYPE)res;
+                    } else {
+                        /* unaligned/aligned access in the same page */
+                        res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
@@ -158,7 +295,7 @@ static DATA_TYPE glue(glue(slow_ld, SUFF
             res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
                                                           mmu_idx, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
             res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
 #else
             res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
@@ -186,22 +323,39 @@ static void glue(glue(slow_st, SUFFIX), 
 static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
                                           DATA_TYPE val,
                                           target_ulong tlb_addr,
-                                          void *retaddr)
+                                          void *retaddr, int do_ioswap)
 {
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     env->mem_write_vaddr = tlb_addr;
     env->mem_write_pc = (unsigned long)retaddr;
 #if SHIFT <= 2
+    if (do_ioswap != DO_IOSWAP)
+        val = IOSWAP(val);
     io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val >> 32);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #else
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -224,12 +378,37 @@ void REGPARM(2) glue(glue(__st, SUFFIX),
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            retaddr = GETPC();
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    retaddr = GETPC();
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, 1, mmu_idx, retaddr);
+                    }
+#endif
+                    glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr, val);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                retaddr = GETPC();
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+            /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
@@ -275,15 +454,48 @@ static void glue(glue(slow_st, SUFFIX), 
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        /* XXX: not efficient, but simple */
+                        for(i = 0;i < DATA_SIZE; i++) {
+#ifdef ACCESS_WORDS_BIGENDIAN
+                            glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
+                                                      mmu_idx, retaddr);
+#else
+                            glue(slow_stb, MMUSUFFIX)(addr + i,
+                                                      val >> (((DATA_SIZE - 1) * 8)
+                                                              - (i * 8)),
+                                                      mmu_idx, retaddr);
+#endif
+                        }
+
+                    } else {
+                        /* aligned/unaligned access in the same page */
+                        glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr,
+                                                      val);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
             for(i = 0;i < DATA_SIZE; i++) {
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
                 glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
                                           mmu_idx, retaddr);
 #else
@@ -304,10 +516,15 @@ static void glue(glue(slow_st, SUFFIX), 
 
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
 
+#undef DO_IOSWAP
+#undef IOSWAP
+#undef ACCESS_WORDS_BIGENDIAN
 #undef READ_ACCESS_TYPE
 #undef SHIFT
 #undef DATA_TYPE
 #undef SUFFIX
 #undef USUFFIX
+#undef RSUFFIX
+#undef URSUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
Index: hw/eepro100.c
===================================================================
RCS file: /sources/qemu/qemu/hw/eepro100.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 eepro100.c
--- hw/eepro100.c	16 Sep 2007 21:07:52 -0000	1.6
+++ hw/eepro100.c	14 Oct 2007 11:36:52 -0000
@@ -723,7 +723,7 @@ static void eepro100_cu_command(EEPRO100
             uint32_t tbd_address = cb_address + 0x10;
             assert(tcb_bytes <= sizeof(buf));
             while (size < tcb_bytes) {
-                uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                uint32_t tx_buffer_address = ldul_phys(tbd_address);
                 uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                 //~ uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                 tbd_address += 8;
@@ -743,7 +743,7 @@ static void eepro100_cu_command(EEPRO100
                     /* Extended TCB. */
                     assert(tcb_bytes == 0);
                     for (; tbd_count < 2; tbd_count++) {
-                        uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                        uint32_t tx_buffer_address = ldul_phys(tbd_address);
                         uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                         uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                         tbd_address += 8;
@@ -760,7 +760,7 @@ static void eepro100_cu_command(EEPRO100
                 }
                 tbd_address = tbd_array;
                 for (; tbd_count < tx.tbd_count; tbd_count++) {
-                    uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                    uint32_t tx_buffer_address = ldul_phys(tbd_address);
                     uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                     uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                     tbd_address += 8;
Index: hw/pc.c
===================================================================
RCS file: /sources/qemu/qemu/hw/pc.c,v
retrieving revision 1.87
diff -u -d -d -p -r1.87 pc.c
--- hw/pc.c	9 Oct 2007 03:08:56 -0000	1.87
+++ hw/pc.c	14 Oct 2007 11:36:52 -0000
@@ -477,8 +477,8 @@ static void load_linux(const char *kerne
     }
 
     /* kernel protocol version */
-    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
-    if (ldl_p(header+0x202) == 0x53726448)
+    fprintf(stderr, "header magic: %#x\n", ldul_p(header+0x202));
+    if (ldul_p(header+0x202) == 0x53726448)
 	protocol = lduw_p(header+0x206);
     else
 	protocol = 0;
@@ -510,7 +510,7 @@ static void load_linux(const char *kerne
 
     /* highest address for loading the initrd */
     if (protocol >= 0x203)
-	initrd_max = ldl_p(header+0x22c);
+	initrd_max = ldul_p(header+0x22c);
     else
 	initrd_max = 0x37ffffff;
 
Index: hw/pl080.c
===================================================================
RCS file: /sources/qemu/qemu/hw/pl080.c,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 pl080.c
--- hw/pl080.c	16 Sep 2007 21:07:55 -0000	1.5
+++ hw/pl080.c	14 Oct 2007 11:36:52 -0000
@@ -162,10 +162,10 @@ again:
             if (size == 0) {
                 /* Transfer complete.  */
                 if (ch->lli) {
-                    ch->src = ldl_phys(ch->lli);
-                    ch->dest = ldl_phys(ch->lli + 4);
-                    ch->ctrl = ldl_phys(ch->lli + 12);
-                    ch->lli = ldl_phys(ch->lli + 8);
+                    ch->src = ldul_phys(ch->lli);
+                    ch->dest = ldul_phys(ch->lli + 4);
+                    ch->ctrl = ldul_phys(ch->lli + 12);
+                    ch->lli = ldul_phys(ch->lli + 8);
                 } else {
                     ch->conf &= ~PL080_CCONF_E;
                 }
Index: hw/sun4m.c
===================================================================
RCS file: /sources/qemu/qemu/hw/sun4m.c,v
retrieving revision 1.55
diff -u -d -d -p -r1.55 sun4m.c
--- hw/sun4m.c	6 Oct 2007 11:28:21 -0000	1.55
+++ hw/sun4m.c	14 Oct 2007 11:36:52 -0000
@@ -465,7 +465,7 @@ static void sun4m_load_kernel(long vram_
         }
         if (initrd_size > 0) {
             for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
-                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
+                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
                     == 0x48647253) { // HdrS
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
Index: hw/sun4u.c
===================================================================
RCS file: /sources/qemu/qemu/hw/sun4u.c,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 sun4u.c
--- hw/sun4u.c	6 Oct 2007 11:28:21 -0000	1.22
+++ hw/sun4u.c	14 Oct 2007 11:36:52 -0000
@@ -418,7 +418,7 @@ static void sun4u_init(int ram_size, int
         }
         if (initrd_size > 0) {
             for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
-                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
+                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
                     == 0x48647253) { // HdrS
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
Index: linux-user/elfload.c
===================================================================
RCS file: /sources/qemu/qemu/linux-user/elfload.c,v
retrieving revision 1.51
diff -u -d -d -p -r1.51 elfload.c
--- linux-user/elfload.c	9 Oct 2007 16:34:29 -0000	1.51
+++ linux-user/elfload.c	14 Oct 2007 11:36:52 -0000
@@ -336,7 +336,7 @@ static inline void init_thread(struct ta
     pos += sizeof(target_ulong);
     _regs->gpr[4] = pos;
     for (tmp = 1; tmp != 0; pos += sizeof(target_ulong))
-        tmp = ldl(pos);
+        tmp = ldul(pos);
     _regs->gpr[5] = pos;
 }
 
Index: linux-user/qemu.h
===================================================================
RCS file: /sources/qemu/qemu/linux-user/qemu.h,v
retrieving revision 1.40
diff -u -d -d -p -r1.40 qemu.h
--- linux-user/qemu.h	9 Oct 2007 16:34:29 -0000	1.40
+++ linux-user/qemu.h	14 Oct 2007 11:36:52 -0000
@@ -313,7 +313,7 @@ static inline void *lock_user_string(tar
 #define tput8(addr, val) stb(addr, val)
 #define tget16(addr) lduw(addr)
 #define tput16(addr, val) stw(addr, val)
-#define tget32(addr) ldl(addr)
+#define tget32(addr) ldul(addr)
 #define tput32(addr, val) stl(addr, val)
 #define tget64(addr) ldq(addr)
 #define tput64(addr, val) stq(addr, val)
@@ -321,7 +321,7 @@ static inline void *lock_user_string(tar
 #define tgetl(addr) ldq(addr)
 #define tputl(addr, val) stq(addr, val)
 #else
-#define tgetl(addr) ldl(addr)
+#define tgetl(addr) ldul(addr)
 #define tputl(addr, val) stl(addr, val)
 #endif
 
Index: linux-user/signal.c
===================================================================
RCS file: /sources/qemu/qemu/linux-user/signal.c,v
retrieving revision 1.45
diff -u -d -d -p -r1.45 signal.c
--- linux-user/signal.c	5 Oct 2007 17:01:51 -0000	1.45
+++ linux-user/signal.c	14 Oct 2007 11:36:53 -0000
@@ -878,28 +878,28 @@ restore_sigcontext(CPUX86State *env, str
         cpu_x86_load_seg(env, R_ES, lduw(&sc->es));
         cpu_x86_load_seg(env, R_DS, lduw(&sc->ds));
 
-        env->regs[R_EDI] = ldl(&sc->edi);
-        env->regs[R_ESI] = ldl(&sc->esi);
-        env->regs[R_EBP] = ldl(&sc->ebp);
-        env->regs[R_ESP] = ldl(&sc->esp);
-        env->regs[R_EBX] = ldl(&sc->ebx);
-        env->regs[R_EDX] = ldl(&sc->edx);
-        env->regs[R_ECX] = ldl(&sc->ecx);
-        env->eip = ldl(&sc->eip);
+        env->regs[R_EDI] = ldul(&sc->edi);
+        env->regs[R_ESI] = ldul(&sc->esi);
+        env->regs[R_EBP] = ldul(&sc->ebp);
+        env->regs[R_ESP] = ldul(&sc->esp);
+        env->regs[R_EBX] = ldul(&sc->ebx);
+        env->regs[R_EDX] = ldul(&sc->edx);
+        env->regs[R_ECX] = ldul(&sc->ecx);
+        env->eip = ldul(&sc->eip);
 
         cpu_x86_load_seg(env, R_CS, lduw(&sc->cs) | 3);
         cpu_x86_load_seg(env, R_SS, lduw(&sc->ss) | 3);
 
 	{
 		unsigned int tmpflags;
-                tmpflags = ldl(&sc->eflags);
+                tmpflags = ldul(&sc->eflags);
 		env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
                 //		regs->orig_eax = -1;		/* disable syscall checks */
 	}
 
 	{
 		struct _fpstate * buf;
-                buf = (void *)ldl(&sc->fpstate);
+                buf = (void *)ldul(&sc->fpstate);
 		if (buf) {
 #if 0
 			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
@@ -909,7 +909,7 @@ restore_sigcontext(CPUX86State *env, str
 		}
 	}
 
-        *peax = ldl(&sc->eax);
+        *peax = ldul(&sc->eax);
 	return err;
 #if 0
 badframe:
Index: linux-user/vm86.c
===================================================================
RCS file: /sources/qemu/qemu/linux-user/vm86.c,v
retrieving revision 1.11
diff -u -d -d -p -r1.11 vm86.c
--- linux-user/vm86.c	17 Sep 2007 08:09:50 -0000	1.11
+++ linux-user/vm86.c	14 Oct 2007 11:36:53 -0000
@@ -56,7 +56,7 @@ static inline unsigned int vm_getw(uint8
 
 static inline unsigned int vm_getl(uint8_t *segptr, unsigned int reg16)
 {
-    return ldl(segptr + (reg16 & 0xffff));
+    return ldul(segptr + (reg16 & 0xffff));
 }
 
 void save_v86_state(CPUX86State *env)
Index: target-alpha/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/exec.h,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 exec.h
--- target-alpha/exec.h	14 Oct 2007 07:07:05 -0000	1.4
+++ target-alpha/exec.h	14 Oct 2007 11:36:53 -0000
@@ -62,6 +62,9 @@ register uint64_t T2 asm(AREG3);
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-alpha/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/helper.c,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 helper.c
--- target-alpha/helper.c	14 Oct 2007 07:07:05 -0000	1.5
+++ target-alpha/helper.c	14 Oct 2007 11:36:53 -0000
@@ -69,7 +69,7 @@ int cpu_alpha_handle_mmu_fault (CPUState
             env->exception_index = EXCP_DTB_MISS_PAL;
         else
             env->exception_index = EXCP_DTB_MISS_NATIVE;
-        opc = (ldl_code(env->pc) >> 21) << 4;
+        opc = (ldul_code(env->pc) >> 21) << 4;
         if (rw) {
             opc |= 0x9;
         } else {
Index: target-alpha/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/op_helper.c,v
retrieving revision 1.4
diff -u -d -d -p -r1.4 op_helper.c
--- target-alpha/op_helper.c	14 Oct 2007 08:18:12 -0000	1.4
+++ target-alpha/op_helper.c	14 Oct 2007 11:36:53 -0000
@@ -1213,6 +1213,21 @@ void helper_st_phys_to_virt (void)
 
 #define MMUSUFFIX _mmu
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1224,6 +1239,7 @@ void helper_st_phys_to_virt (void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-alpha/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/op_mem.h,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_mem.h
--- target-alpha/op_mem.h	16 Sep 2007 21:08:01 -0000	1.2
+++ target-alpha/op_mem.h	14 Oct 2007 11:36:53 -0000
@@ -30,7 +30,7 @@ static inline uint32_t glue(ldl_l, MEMSU
 {
     env->lock = EA;
 
-    return glue(ldl, MEMSUFFIX)(EA);
+    return glue(ldul, MEMSUFFIX)(EA);
 }
 
 static inline uint32_t glue(ldq_l, MEMSUFFIX) (target_ulong EA)
@@ -82,7 +82,7 @@ ALPHA_LD_OP(bu, ldub);
 ALPHA_ST_OP(b, stb);
 ALPHA_LD_OP(wu, lduw);
 ALPHA_ST_OP(w, stw);
-ALPHA_LD_OP(l, ldl);
+ALPHA_LD_OP(l, ldul);
 ALPHA_ST_OP(l, stl);
 ALPHA_LD_OP(q, ldq);
 ALPHA_ST_OP(q, stq);
Index: target-alpha/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-alpha/translate.c,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 translate.c
--- target-alpha/translate.c	14 Oct 2007 08:50:17 -0000	1.6
+++ target-alpha/translate.c	14 Oct 2007 11:36:53 -0000
@@ -2010,7 +2010,7 @@ int gen_intermediate_code_internal (CPUS
                     ctx.pc, ctx.mem_idx);
         }
 #endif
-        insn = ldl_code(ctx.pc);
+        insn = ldul_code(ctx.pc);
 #if defined ALPHA_DEBUG_DISAS
         insn_count++;
         if (logfile != NULL) {
Index: target-arm/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-arm/exec.h,v
retrieving revision 1.14
diff -u -d -d -p -r1.14 exec.h
--- target-arm/exec.h	14 Oct 2007 07:07:05 -0000	1.14
+++ target-arm/exec.h	14 Oct 2007 11:36:53 -0000
@@ -64,6 +64,9 @@ static inline int cpu_halted(CPUState *e
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 /* In op_helper.c */
Index: target-arm/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/helper.c,v
retrieving revision 1.23
diff -u -d -d -p -r1.23 helper.c
--- target-arm/helper.c	14 Oct 2007 07:07:05 -0000	1.23
+++ target-arm/helper.c	14 Oct 2007 11:36:53 -0000
@@ -297,7 +297,7 @@ void do_interrupt(CPUARMState *env)
             if (env->thumb) {
                 mask = lduw_code(env->regs[15] - 2) & 0xff;
             } else {
-                mask = ldl_code(env->regs[15] - 4) & 0xffffff;
+                mask = ldul_code(env->regs[15] - 4) & 0xffffff;
             }
             /* Only intercept calls from privileged modes, to provide some
                semblance of security.  */
@@ -473,7 +473,7 @@ static int get_phys_addr(CPUState *env, 
         /* Pagetable walk.  */
         /* Lookup l1 descriptor.  */
         table = (env->cp15.c2_base & 0xffffc000) | ((address >> 18) & 0x3ffc);
-        desc = ldl_phys(table);
+        desc = ldul_phys(table);
         type = (desc & 3);
         domain = (env->cp15.c3 >> ((desc >> 4) & 0x1e)) & 3;
         if (type == 0) {
@@ -502,7 +502,7 @@ static int get_phys_addr(CPUState *env, 
                 /* Fine pagetable.  */
                 table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
             }
-            desc = ldl_phys(table);
+            desc = ldul_phys(table);
             switch (desc & 3) {
             case 0: /* Page translation fault.  */
                 code = 7;
Index: target-arm/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/op_helper.c,v
retrieving revision 1.7
diff -u -d -d -p -r1.7 op_helper.c
--- target-arm/op_helper.c	14 Oct 2007 07:07:05 -0000	1.7
+++ target-arm/op_helper.c	14 Oct 2007 11:36:53 -0000
@@ -180,6 +180,21 @@ void do_vfp_get_fpscr(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -191,6 +206,7 @@ void do_vfp_get_fpscr(void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-arm/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-arm/op_mem.h,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_mem.h
--- target-arm/op_mem.h	30 Apr 2007 02:02:16 -0000	1.2
+++ target-arm/op_mem.h	14 Oct 2007 11:36:53 -0000
@@ -1,18 +1,17 @@
 /* ARM memory operations.  */
 
-/* Load from address T1 into T0.  */
-#define MEM_LD_OP(name) \
+#define MEM_LD_OP(name, lname) \
 void OPPROTO glue(op_ld##name,MEMSUFFIX)(void) \
 { \
-    T0 = glue(ld##name,MEMSUFFIX)(T1); \
+    T0 = glue(ld##lname,MEMSUFFIX)(T1); \
     FORCE_RET(); \
 }
 
-MEM_LD_OP(ub)
-MEM_LD_OP(sb)
-MEM_LD_OP(uw)
-MEM_LD_OP(sw)
-MEM_LD_OP(l)
+MEM_LD_OP(ub,ub)
+MEM_LD_OP(sb,sb)
+MEM_LD_OP(uw,uw)
+MEM_LD_OP(sw,sw)
+MEM_LD_OP(l,ul)
 
 #undef MEM_LD_OP
 
@@ -45,7 +44,7 @@ void OPPROTO glue(op_swp##name,MEMSUFFIX
 }
 
 MEM_SWP_OP(b, ub)
-MEM_SWP_OP(l, l)
+MEM_SWP_OP(l, ul)
 
 #undef MEM_SWP_OP
 
@@ -82,7 +81,7 @@ void OPPROTO glue(op_iwmmxt_st##name,MEM
 
 MMX_MEM_OP(b, ub)
 MMX_MEM_OP(w, uw)
-MMX_MEM_OP(l, l)
+MMX_MEM_OP(l, ul)
 MMX_MEM_OP(q, q)
 
 #undef MMX_MEM_OP
Index: target-arm/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-arm/translate.c,v
retrieving revision 1.57
diff -u -d -d -p -r1.57 translate.c
--- target-arm/translate.c	17 Sep 2007 08:09:51 -0000	1.57
+++ target-arm/translate.c	14 Oct 2007 11:36:53 -0000
@@ -2206,7 +2206,7 @@ static void disas_arm_insn(CPUState * en
 {
     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
 
-    insn = ldl_code(s->pc);
+    insn = ldul_code(s->pc);
     s->pc += 4;
 
     cond = insn >> 28;
Index: target-cris/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-cris/exec.h,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 exec.h
--- target-cris/exec.h	14 Oct 2007 07:07:06 -0000	1.2
+++ target-cris/exec.h	14 Oct 2007 11:36:53 -0000
@@ -50,6 +50,9 @@ void tlb_fill (target_ulong addr, int is
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_cris_flush_flags(CPUCRISState *env, int cc_op);
Index: target-cris/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/helper.c,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 helper.c
--- target-cris/helper.c	14 Oct 2007 07:07:06 -0000	1.2
+++ target-cris/helper.c	14 Oct 2007 11:36:53 -0000
@@ -106,7 +106,7 @@ void do_interrupt(CPUState *env)
 //			printf ("BREAK! %d\n", env->trapnr);
 			irqnum = env->trapnr;
 			ebp = env->pregs[SR_EBP];
-			isr = ldl_code(ebp + irqnum * 4);
+			isr = ldul_code(ebp + irqnum * 4);
 			env->pregs[SR_ERP] = env->pc + 2;
 			env->pc = isr;
 
@@ -117,7 +117,7 @@ void do_interrupt(CPUState *env)
 //			printf ("MMU miss\n");
 			irqnum = 4;
 			ebp = env->pregs[SR_EBP];
-			isr = ldl_code(ebp + irqnum * 4);
+			isr = ldul_code(ebp + irqnum * 4);
 			env->pregs[SR_ERP] = env->pc;
 			env->pc = isr;
 			cris_shift_ccs(env);
@@ -138,7 +138,7 @@ void do_interrupt(CPUState *env)
 					__builtin_clz(env->pending_interrupts);
 				irqnum += 0x30;
 				ebp = env->pregs[SR_EBP];
-				isr = ldl_code(ebp + irqnum * 4);
+				isr = ldul_code(ebp + irqnum * 4);
 				env->pregs[SR_ERP] = env->pc;
 				env->pc = isr;
 
Index: target-cris/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/op_helper.c,v
retrieving revision 1.2
diff -u -d -d -p -r1.2 op_helper.c
--- target-cris/op_helper.c	14 Oct 2007 07:07:06 -0000	1.2
+++ target-cris/op_helper.c	14 Oct 2007 11:36:53 -0000
@@ -25,6 +25,21 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -36,6 +51,7 @@
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-cris/op_mem.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/op_mem.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 op_mem.c
--- target-cris/op_mem.c	8 Oct 2007 13:04:02 -0000	1.1
+++ target-cris/op_mem.c	14 Oct 2007 11:36:53 -0000
@@ -49,7 +49,7 @@ void glue(op_stw_T0_T1, MEMSUFFIX) (void
 }
 
 void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
-    T0 = glue(ldl, MEMSUFFIX) (T0);
+    T0 = glue(ldul, MEMSUFFIX) (T0);
     RETURN();
 }
 
Index: target-cris/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-cris/translate.c,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 translate.c
--- target-cris/translate.c	8 Oct 2007 12:49:08 -0000	1.1
+++ target-cris/translate.c	14 Oct 2007 11:36:53 -0000
@@ -828,7 +828,7 @@ static int dec_prep_alu_m(DisasContext *
 		if (memsize == 1)
 			insn_len++;
 
-		imm = ldl_code(dc->pc + 2);
+		imm = ldul_code(dc->pc + 2);
 		if (memsize != 4) {
 			if (s_ext) {
 				imm = sign_extend(imm, (memsize * 8) - 1);
@@ -1962,7 +1962,7 @@ static unsigned int dec_lapc_im(DisasCon
 	rd = dc->op2;
 
 	cris_cc_mask(dc, 0);
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 	DIS(fprintf (logfile, "lapc 0x%x, $r%u\n", imm + dc->pc, dc->op2));
 	gen_op_movl_T0_im (dc->pc + imm);
 	gen_movl_reg_T0[rd] ();
@@ -1999,7 +1999,7 @@ static unsigned int dec_jas_im(DisasCont
 {
 	uint32_t imm;
 
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "jas 0x%x\n", imm));
 	cris_cc_mask(dc, 0);
@@ -2016,7 +2016,7 @@ static unsigned int dec_jasc_im(DisasCon
 {
 	uint32_t imm;
 
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "jasc 0x%x\n", imm));
 	cris_cc_mask(dc, 0);
@@ -2047,7 +2047,7 @@ static unsigned int dec_bcc_im(DisasCont
 	int32_t offset;
 	uint32_t cond = dc->op2;
 
-	offset = ldl_code(dc->pc + 2);
+	offset = ldul_code(dc->pc + 2);
 	offset = sign_extend(offset, 15);
 
 	DIS(fprintf (logfile, "b%s %d pc=%x dst=%x\n",
@@ -2065,7 +2065,7 @@ static unsigned int dec_bas_im(DisasCont
 	int32_t simm;
 
 
-	simm = ldl_code(dc->pc + 2);
+	simm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "bas 0x%x, $p%u\n", dc->pc + simm, dc->op2));
 	cris_cc_mask(dc, 0);
@@ -2081,7 +2081,7 @@ static unsigned int dec_bas_im(DisasCont
 static unsigned int dec_basc_im(DisasContext *dc)
 {
 	int32_t simm;
-	simm = ldl_code(dc->pc + 2);
+	simm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "basc 0x%x, $p%u\n", dc->pc + simm, dc->op2));
 	cris_cc_mask(dc, 0);
@@ -2259,7 +2259,7 @@ cris_decoder(DisasContext *dc)
 	int i;
 
 	/* Load a halfword onto the instruction register.  */
-	tmp = ldl_code(dc->pc);
+	tmp = ldul_code(dc->pc);
 	dc->ir = tmp & 0xffff;
 
 	/* Now decode it.  */
Index: target-i386/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/exec.h,v
retrieving revision 1.38
diff -u -d -d -p -r1.38 exec.h
--- target-i386/exec.h	14 Oct 2007 07:07:06 -0000	1.38
+++ target-i386/exec.h	14 Oct 2007 11:36:53 -0000
@@ -217,6 +217,9 @@ void check_iol_DX(void);
 #if !defined(CONFIG_USER_ONLY)
 
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 
 static inline double ldfq(target_ulong ptr)
 {
@@ -244,7 +247,7 @@ static inline float ldfl(target_ulong pt
         float f;
         uint32_t i;
     } u;
-    u.i = ldl(ptr);
+    u.i = ldul(ptr);
     return u.f;
 }
 
@@ -419,12 +422,12 @@ static inline void helper_fstt(CPU86_LDo
 
 static inline CPU86_LDouble helper_fldt(target_ulong ptr)
 {
-    return *(CPU86_LDouble *)ptr;
+    return *(CPU86_LDouble *)(unsigned long)ptr;
 }
 
 static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
 {
-    *(CPU86_LDouble *)ptr = f;
+    *(CPU86_LDouble *)(unsigned long)ptr = f;
 }
 
 #else
Index: target-i386/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/helper.c,v
retrieving revision 1.90
diff -u -d -d -p -r1.90 helper.c
--- target-i386/helper.c	14 Oct 2007 07:07:06 -0000	1.90
+++ target-i386/helper.c	14 Oct 2007 11:36:54 -0000
@@ -122,8 +122,8 @@ static inline int load_segment(uint32_t 
     if ((index + 7) > dt->limit)
         return -1;
     ptr = dt->base + index;
-    *e1_ptr = ldl_kernel(ptr);
-    *e2_ptr = ldl_kernel(ptr + 4);
+    *e1_ptr = ldul_kernel(ptr);
+    *e2_ptr = ldul_kernel(ptr + 4);
     return 0;
 }
 
@@ -186,7 +186,7 @@ static inline void get_ss_esp_from_tss(u
         *esp_ptr = lduw_kernel(env->tr.base + index);
         *ss_ptr = lduw_kernel(env->tr.base + index + 2);
     } else {
-        *esp_ptr = ldl_kernel(env->tr.base + index);
+        *esp_ptr = ldul_kernel(env->tr.base + index);
         *ss_ptr = lduw_kernel(env->tr.base + index + 4);
     }
 }
@@ -302,15 +302,15 @@ static void switch_tss(int tss_selector,
     /* read all the registers from the new TSS */
     if (type & 8) {
         /* 32 bit */
-        new_cr3 = ldl_kernel(tss_base + 0x1c);
-        new_eip = ldl_kernel(tss_base + 0x20);
-        new_eflags = ldl_kernel(tss_base + 0x24);
+        new_cr3 = ldul_kernel(tss_base + 0x1c);
+        new_eip = ldul_kernel(tss_base + 0x20);
+        new_eflags = ldul_kernel(tss_base + 0x24);
         for(i = 0; i < 8; i++)
-            new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
+            new_regs[i] = ldul_kernel(tss_base + (0x28 + i * 4));
         for(i = 0; i < 6; i++)
             new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
         new_ldt = lduw_kernel(tss_base + 0x60);
-        new_trap = ldl_kernel(tss_base + 0x64);
+        new_trap = ldul_kernel(tss_base + 0x64);
     } else {
         /* 16 bit */
         new_cr3 = 0;
@@ -341,7 +341,7 @@ static void switch_tss(int tss_selector,
         target_ulong ptr;
         uint32_t e2;
         ptr = env->gdt.base + (env->tr.selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
+        e2 = ldul_kernel(ptr + 4);
         e2 &= ~DESC_TSS_BUSY_MASK;
         stl_kernel(ptr + 4, e2);
     }
@@ -393,7 +393,7 @@ static void switch_tss(int tss_selector,
         target_ulong ptr;
         uint32_t e2;
         ptr = env->gdt.base + (tss_selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
+        e2 = ldul_kernel(ptr + 4);
         e2 |= DESC_TSS_BUSY_MASK;
         stl_kernel(ptr + 4, e2);
     }
@@ -456,8 +456,8 @@ static void switch_tss(int tss_selector,
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
             raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
         if (!(e2 & DESC_P_MASK))
@@ -580,7 +580,7 @@ do {\
 
 #define POPL(ssp, sp, sp_mask, val)\
 {\
-    val = (uint32_t)ldl_kernel((ssp) + (sp & (sp_mask)));\
+    val = (uint32_t)ldul_kernel((ssp) + (sp & (sp_mask)));\
     sp += 4;\
 }
 
@@ -629,8 +629,8 @@ static void do_interrupt_protected(int i
     if (intno * 8 + 7 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
     ptr = dt->base + intno * 8;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
+    e1 = ldul_kernel(ptr);
+    e2 = ldul_kernel(ptr + 4);
     /* check gate type */
     type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
     switch(type) {
@@ -875,9 +875,9 @@ static void do_interrupt64(int intno, in
     if (intno * 16 + 15 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
     ptr = dt->base + intno * 16;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
-    e3 = ldl_kernel(ptr + 8);
+    e1 = ldul_kernel(ptr);
+    e2 = ldul_kernel(ptr + 4);
+    e3 = ldul_kernel(ptr + 8);
     /* check gate type */
     type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
     switch(type) {
@@ -1147,7 +1147,7 @@ void do_interrupt_user(int intno, int is
 
     dt = &env->idt;
     ptr = dt->base + (intno * 8);
-    e2 = ldl_kernel(ptr + 4);
+    e2 = ldul_kernel(ptr + 4);
 
     dpl = (e2 >> DESC_DPL_SHIFT) & 3;
     cpl = env->hflags & HF_CPL_MASK;
@@ -1469,24 +1469,24 @@ void helper_rsm(void)
         cpu_x86_load_seg_cache(env, i,
                                lduw_phys(sm_state + offset),
                                ldq_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
+                               ldul_phys(sm_state + offset + 4),
                                (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8);
     }
 
     env->gdt.base = ldq_phys(sm_state + 0x7e68);
-    env->gdt.limit = ldl_phys(sm_state + 0x7e64);
+    env->gdt.limit = ldul_phys(sm_state + 0x7e64);
 
     env->ldt.selector = lduw_phys(sm_state + 0x7e70);
     env->ldt.base = ldq_phys(sm_state + 0x7e78);
-    env->ldt.limit = ldl_phys(sm_state + 0x7e74);
+    env->ldt.limit = ldul_phys(sm_state + 0x7e74);
     env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8;
 
     env->idt.base = ldq_phys(sm_state + 0x7e88);
-    env->idt.limit = ldl_phys(sm_state + 0x7e84);
+    env->idt.limit = ldul_phys(sm_state + 0x7e84);
 
     env->tr.selector = lduw_phys(sm_state + 0x7e90);
     env->tr.base = ldq_phys(sm_state + 0x7e98);
-    env->tr.limit = ldl_phys(sm_state + 0x7e94);
+    env->tr.limit = ldul_phys(sm_state + 0x7e94);
     env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8;
 
     EAX = ldq_phys(sm_state + 0x7ff8);
@@ -1500,51 +1500,51 @@ void helper_rsm(void)
     for(i = 8; i < 16; i++)
         env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8);
     env->eip = ldq_phys(sm_state + 0x7f78);
-    load_eflags(ldl_phys(sm_state + 0x7f70),
+    load_eflags(ldul_phys(sm_state + 0x7f70),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->dr[6] = ldl_phys(sm_state + 0x7f68);
-    env->dr[7] = ldl_phys(sm_state + 0x7f60);
+    env->dr[6] = ldul_phys(sm_state + 0x7f68);
+    env->dr[7] = ldul_phys(sm_state + 0x7f60);
 
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50));
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58));
+    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f48));
+    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7f50));
+    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7f58));
 
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff;
+        env->smbase = ldul_phys(sm_state + 0x7f00) & ~0x7fff;
     }
 #else
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8));
-    load_eflags(ldl_phys(sm_state + 0x7ff4),
+    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7ffc));
+    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7ff8));
+    load_eflags(ldul_phys(sm_state + 0x7ff4),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->eip = ldl_phys(sm_state + 0x7ff0);
-    EDI = ldl_phys(sm_state + 0x7fec);
-    ESI = ldl_phys(sm_state + 0x7fe8);
-    EBP = ldl_phys(sm_state + 0x7fe4);
-    ESP = ldl_phys(sm_state + 0x7fe0);
-    EBX = ldl_phys(sm_state + 0x7fdc);
-    EDX = ldl_phys(sm_state + 0x7fd8);
-    ECX = ldl_phys(sm_state + 0x7fd4);
-    EAX = ldl_phys(sm_state + 0x7fd0);
-    env->dr[6] = ldl_phys(sm_state + 0x7fcc);
-    env->dr[7] = ldl_phys(sm_state + 0x7fc8);
+    env->eip = ldul_phys(sm_state + 0x7ff0);
+    EDI = ldul_phys(sm_state + 0x7fec);
+    ESI = ldul_phys(sm_state + 0x7fe8);
+    EBP = ldul_phys(sm_state + 0x7fe4);
+    ESP = ldul_phys(sm_state + 0x7fe0);
+    EBX = ldul_phys(sm_state + 0x7fdc);
+    EDX = ldul_phys(sm_state + 0x7fd8);
+    ECX = ldul_phys(sm_state + 0x7fd4);
+    EAX = ldul_phys(sm_state + 0x7fd0);
+    env->dr[6] = ldul_phys(sm_state + 0x7fcc);
+    env->dr[7] = ldul_phys(sm_state + 0x7fc8);
 
-    env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff;
-    env->tr.base = ldl_phys(sm_state + 0x7f64);
-    env->tr.limit = ldl_phys(sm_state + 0x7f60);
-    env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
+    env->tr.selector = ldul_phys(sm_state + 0x7fc4) & 0xffff;
+    env->tr.base = ldul_phys(sm_state + 0x7f64);
+    env->tr.limit = ldul_phys(sm_state + 0x7f60);
+    env->tr.flags = (ldul_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
 
-    env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff;
-    env->ldt.base = ldl_phys(sm_state + 0x7f80);
-    env->ldt.limit = ldl_phys(sm_state + 0x7f7c);
-    env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
+    env->ldt.selector = ldul_phys(sm_state + 0x7fc0) & 0xffff;
+    env->ldt.base = ldul_phys(sm_state + 0x7f80);
+    env->ldt.limit = ldul_phys(sm_state + 0x7f7c);
+    env->ldt.flags = (ldul_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
 
-    env->gdt.base = ldl_phys(sm_state + 0x7f74);
-    env->gdt.limit = ldl_phys(sm_state + 0x7f70);
+    env->gdt.base = ldul_phys(sm_state + 0x7f74);
+    env->gdt.limit = ldul_phys(sm_state + 0x7f70);
 
-    env->idt.base = ldl_phys(sm_state + 0x7f58);
-    env->idt.limit = ldl_phys(sm_state + 0x7f54);
+    env->idt.base = ldul_phys(sm_state + 0x7f58);
+    env->idt.limit = ldul_phys(sm_state + 0x7f54);
 
     for(i = 0; i < 6; i++) {
         if (i < 3)
@@ -1552,16 +1552,16 @@ void helper_rsm(void)
         else
             offset = 0x7f2c + (i - 3) * 12;
         cpu_x86_load_seg_cache(env, i,
-                               ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
-                               ldl_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
-                               (ldl_phys(sm_state + offset) & 0xf0ff) << 8);
+                               ldul_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
+                               ldul_phys(sm_state + offset + 8),
+                               ldul_phys(sm_state + offset + 4),
+                               (ldul_phys(sm_state + offset) & 0xf0ff) << 8);
     }
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14));
+    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f14));
 
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff;
+        env->smbase = ldul_phys(sm_state + 0x7ef8) & ~0x7fff;
     }
 #endif
     CC_OP = CC_OP_EFLAGS;
@@ -1761,7 +1761,7 @@ void helper_enter_level(int level, int d
         while (--level) {
             esp -= 4;
             ebp -= 4;
-            stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask)));
+            stl(ssp + (esp & esp_mask), ldul(ssp + (ebp & esp_mask)));
         }
         esp -= 4;
         stl(ssp + (esp & esp_mask), T1);
@@ -1836,8 +1836,8 @@ void helper_lldt_T0(void)
         if ((index + entry_limit) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         if (!(e2 & DESC_P_MASK))
@@ -1845,7 +1845,7 @@ void helper_lldt_T0(void)
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             uint32_t e3;
-            e3 = ldl_kernel(ptr + 8);
+            e3 = ldul_kernel(ptr + 8);
             load_seg_cache_raw_dt(&env->ldt, e1, e2);
             env->ldt.base |= (target_ulong)e3 << 32;
         } else
@@ -1885,8 +1885,8 @@ void helper_ltr_T0(void)
         if ((index + entry_limit) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
         if ((e2 & DESC_S_MASK) ||
             (type != 1 && type != 9))
@@ -1896,8 +1896,8 @@ void helper_ltr_T0(void)
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             uint32_t e3, e4;
-            e3 = ldl_kernel(ptr + 8);
-            e4 = ldl_kernel(ptr + 12);
+            e3 = ldul_kernel(ptr + 8);
+            e4 = ldul_kernel(ptr + 12);
             if ((e4 >> DESC_TYPE_SHIFT) & 0xf)
                 raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
             load_seg_cache_raw_dt(&env->tr, e1, e2);
@@ -1943,8 +1943,8 @@ void load_seg(int seg_reg, int selector)
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
 
         if (!(e2 & DESC_S_MASK))
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
@@ -2273,7 +2273,7 @@ void helper_lcall_protected_T0_T1(int sh
                 PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector);
                 PUSHL(ssp, sp, sp_mask, ESP);
                 for(i = param_count - 1; i >= 0; i--) {
-                    val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
+                    val = ldul_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
                     PUSHL(ssp, sp, sp_mask, val);
                 }
             } else {
@@ -3569,8 +3569,8 @@ void helper_fxrstor(target_ulong ptr, in
 
     if (env->cr[4] & CR4_OSFXSR_MASK) {
         /* XXX: finish it */
-        env->mxcsr = ldl(ptr + 0x18);
-        //ldl(ptr + 0x1c);
+        env->mxcsr = ldul(ptr + 0x18);
+        //ldul(ptr + 0x1c);
         nb_xmm_regs = 8 << data64;
         addr = ptr + 0xa0;
         for(i = 0; i < nb_xmm_regs; i++) {
@@ -3867,6 +3867,7 @@ void update_fp_status(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -3879,6 +3880,21 @@ void update_fp_status(void)
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 #endif
 
 /* try to fill the TLB and return an exception if error. If retaddr is
@@ -4010,13 +4026,13 @@ void helper_vmrun(target_ulong addr)
     env->intercept_cr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write));
     env->intercept_dr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read));
     env->intercept_dr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write));
-    env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
+    env->intercept_exceptions = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
 
     env->gdt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
+    env->gdt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
 
     env->idt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
+    env->idt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
 
     /* clear exit_info_2 so we behave like the real hardware */
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
@@ -4025,7 +4041,7 @@ void helper_vmrun(target_ulong addr)
     cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
     cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
     env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
-    int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+    int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
     if (int_ctl & V_INTR_MASKING_MASK) {
         env->cr[8] = int_ctl & V_TPR_MASK;
         if (env->eflags & IF_MASK)
@@ -4073,11 +4089,11 @@ void helper_vmrun(target_ulong addr)
     regs_to_env();
 
     /* maybe we need to inject an event */
-    event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+    event_inj = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
     if (event_inj & SVM_EVTINJ_VALID) {
         uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
         uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
-        uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
+        uint32_t event_inj_err = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
 
         if (loglevel & CPU_LOG_TB_IN_ASM)
@@ -4309,7 +4325,7 @@ void vmexit(uint64_t exit_code, uint64_t
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]);
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]);
 
-    if ((int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
+    if ((int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
         int_ctl &= ~V_TPR_MASK;
         int_ctl |= env->cr[8] & V_TPR_MASK;
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl);
@@ -4330,10 +4346,10 @@ void vmexit(uint64_t exit_code, uint64_t
     env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
 
     env->gdt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
+    env->gdt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
 
     env->idt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
+    env->idt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
 
     cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
     cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
Index: target-i386/helper2.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/helper2.c,v
retrieving revision 1.53
diff -u -d -d -p -r1.53 helper2.c
--- target-i386/helper2.c	14 Oct 2007 07:07:06 -0000	1.53
+++ target-i386/helper2.c	14 Oct 2007 11:36:54 -0000
@@ -772,7 +772,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
         /* page directory entry */
         pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) &
             env->a20_mask;
-        pde = ldl_phys(pde_addr);
+        pde = ldul_phys(pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             error_code = 0;
             goto do_fault;
@@ -810,7 +810,7 @@ int cpu_x86_handle_mmu_fault(CPUX86State
             /* page directory entry */
             pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
                 env->a20_mask;
-            pte = ldl_phys(pte_addr);
+            pte = ldul_phys(pte_addr);
             if (!(pte & PG_PRESENT_MASK)) {
                 error_code = 0;
                 goto do_fault;
@@ -910,13 +910,13 @@ target_phys_addr_t cpu_get_phys_page_deb
 
             pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pml4e = ldl_phys(pml4e_addr);
+            pml4e = ldul_phys(pml4e_addr);
             if (!(pml4e & PG_PRESENT_MASK))
                 return -1;
 
             pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pdpe = ldl_phys(pdpe_addr);
+            pdpe = ldul_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK))
                 return -1;
         } else
@@ -924,14 +924,14 @@ target_phys_addr_t cpu_get_phys_page_deb
         {
             pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
                 env->a20_mask;
-            pdpe = ldl_phys(pdpe_addr);
+            pdpe = ldul_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK))
                 return -1;
         }
 
         pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) &
             env->a20_mask;
-        pde = ldl_phys(pde_addr);
+        pde = ldul_phys(pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             return -1;
         }
@@ -944,7 +944,7 @@ target_phys_addr_t cpu_get_phys_page_deb
             pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) &
                 env->a20_mask;
             page_size = 4096;
-            pte = ldl_phys(pte_addr);
+            pte = ldul_phys(pte_addr);
         }
     } else {
         if (!(env->cr[0] & CR0_PG_MASK)) {
@@ -953,7 +953,7 @@ target_phys_addr_t cpu_get_phys_page_deb
         } else {
             /* page directory entry */
             pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
-            pde = ldl_phys(pde_addr);
+            pde = ldul_phys(pde_addr);
             if (!(pde & PG_PRESENT_MASK))
                 return -1;
             if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
@@ -962,7 +962,7 @@ target_phys_addr_t cpu_get_phys_page_deb
             } else {
                 /* page directory entry */
                 pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
-                pte = ldl_phys(pte_addr);
+                pte = ldul_phys(pte_addr);
                 if (!(pte & PG_PRESENT_MASK))
                     return -1;
                 page_size = 4096;
Index: target-i386/op.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/op.c,v
retrieving revision 1.51
diff -u -d -d -p -r1.51 op.c
--- target-i386/op.c	23 Sep 2007 15:28:04 -0000	1.51
+++ target-i386/op.c	14 Oct 2007 11:36:54 -0000
@@ -716,8 +716,8 @@ void OPPROTO op_boundw(void)
 void OPPROTO op_boundl(void)
 {
     int low, high, v;
-    low = ldl(A0);
-    high = ldl(A0 + 4);
+    low = ldul(A0);
+    high = ldul(A0 + 4);
     v = T0;
     if (v < low || v > high) {
         raise_exception(EXCP05_BOUND);
@@ -747,8 +747,6 @@ void OPPROTO op_exit_tb(void)
 
 /* multiple size ops */
 
-#define ldul ldl
-
 #define SHIFT 0
 #include "ops_template.h"
 #undef SHIFT
@@ -1688,7 +1686,7 @@ CCTable cc_table[CC_OP_NB] = {
 void OPPROTO op_flds_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = ldl(A0);
+    FP_CONVERT.i32 = ldul(A0);
     FT0 = FP_CONVERT.f;
 #else
     FT0 = ldfl(A0);
@@ -1715,7 +1713,7 @@ void helper_fild_FT0_A0(void)
 
 void helper_fildl_FT0_A0(void)
 {
-    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
 }
 
 void helper_fildll_FT0_A0(void)
@@ -1753,10 +1751,10 @@ void OPPROTO op_fild_FT0_A0(void)
 void OPPROTO op_fildl_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    FP_CONVERT.i32 = (int32_t) ldul(A0);
     FT0 = (CPU86_LDouble)FP_CONVERT.i32;
 #else
-    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
 #endif
 }
 
@@ -1778,7 +1776,7 @@ void OPPROTO op_flds_ST0_A0(void)
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = ldl(A0);
+    FP_CONVERT.i32 = ldul(A0);
     env->fpregs[new_fpstt].d = FP_CONVERT.f;
 #else
     env->fpregs[new_fpstt].d = ldfl(A0);
@@ -1822,7 +1820,7 @@ void helper_fildl_ST0_A0(void)
 {
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
 }
@@ -1872,10 +1870,10 @@ void OPPROTO op_fildl_ST0_A0(void)
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    FP_CONVERT.i32 = (int32_t) ldul(A0);
     env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i32;
 #else
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
 #endif
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
Index: target-i386/ops_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/ops_mem.h,v
retrieving revision 1.7
diff -u -d -d -p -r1.7 ops_mem.h
--- target-i386/ops_mem.h	28 Nov 2005 21:02:17 -0000	1.7
+++ target-i386/ops_mem.h	14 Oct 2007 11:36:54 -0000
@@ -20,7 +20,7 @@ void OPPROTO glue(glue(op_ldsw, MEMSUFFI
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T0_A0)(void)
 {
-    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldub, MEMSUFFIX), _T1_A0)(void)
@@ -45,7 +45,7 @@ void OPPROTO glue(glue(op_ldsw, MEMSUFFI
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T1_A0)(void)
 {
-    T1 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+    T1 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_stb, MEMSUFFIX), _T0_A0)(void)
@@ -122,12 +122,12 @@ void OPPROTO glue(glue(op_sto, MEMSUFFIX
 #ifdef TARGET_X86_64
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T0_A0)(void)
 {
-    T0 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    T0 = glue(ldsl, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T1_A0)(void)
 {
-    T1 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    T1 = glue(ldsl, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T0_A0)(void)
Index: target-i386/svm.h
===================================================================
RCS file: /sources/qemu/qemu/target-i386/svm.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 svm.h
--- target-i386/svm.h	23 Sep 2007 15:30:28 -0000	1.1
+++ target-i386/svm.h	14 Oct 2007 11:36:54 -0000
@@ -340,13 +340,13 @@ static inline int svm_check_intercept(un
                     R_##seg_index, \
                     lduw_phys(addr + offsetof(struct vmcb, save.seg.selector)),\
                     ldq_phys(addr + offsetof(struct vmcb, save.seg.base)),\
-                    ldl_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
-                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), ldq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldl_phys(addr + offsetof(struct vmcb, save.seg.limit))))
+                    ldul_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
+                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), ldq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldul_phys(addr + offsetof(struct vmcb, save.seg.limit))))
 
 #define SVM_LOAD_SEG2(addr, seg_qemu, seg_vmcb) \
     env->seg_qemu.selector  = lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.selector)); \
     env->seg_qemu.base      = ldq_phys(addr + offsetof(struct vmcb, save.seg_vmcb.base)); \
-    env->seg_qemu.limit     = ldl_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
+    env->seg_qemu.limit     = ldul_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
     env->seg_qemu.flags     = vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.attrib)), env->seg_qemu.base, env->seg_qemu.limit)
 
 #define SVM_SAVE_SEG(addr, seg_qemu, seg_vmcb) \
Index: target-i386/translate-copy.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/translate-copy.c,v
retrieving revision 1.9
diff -u -d -d -p -r1.9 translate-copy.c
--- target-i386/translate-copy.c	17 Sep 2007 08:09:52 -0000	1.9
+++ target-i386/translate-copy.c	14 Oct 2007 11:36:54 -0000
@@ -207,7 +207,7 @@ static inline void gen_lea_modrm(DisasCo
         case 0:
             if (base == 5) {
                 base = -1;
-                disp = ldl_code(s->pc);
+                disp = ldul_code(s->pc);
                 s->pc += 4;
             } else {
                 disp = 0;
@@ -218,7 +218,7 @@ static inline void gen_lea_modrm(DisasCo
             break;
         default:
         case 2:
-            disp = ldl_code(s->pc);
+            disp = ldul_code(s->pc);
             s->pc += 4;
             break;
         }
@@ -266,7 +266,7 @@ static inline uint32_t insn_get(DisasCon
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = ldul_code(s->pc);
         s->pc += 4;
         break;
     }
Index: target-i386/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-i386/translate.c,v
retrieving revision 1.72
diff -u -d -d -p -r1.72 translate.c
--- target-i386/translate.c	27 Sep 2007 01:52:00 -0000	1.72
+++ target-i386/translate.c	14 Oct 2007 11:36:54 -0000
@@ -1462,7 +1462,7 @@ static void gen_lea_modrm(DisasContext *
         case 0:
             if ((base & 7) == 5) {
                 base = -1;
-                disp = (int32_t)ldl_code(s->pc);
+                disp = (int32_t)ldul_code(s->pc);
                 s->pc += 4;
                 if (CODE64(s) && !havesib) {
                     disp += s->pc + s->rip_offset;
@@ -1476,7 +1476,7 @@ static void gen_lea_modrm(DisasContext *
             break;
         default:
         case 2:
-            disp = ldl_code(s->pc);
+            disp = ldul_code(s->pc);
             s->pc += 4;
             break;
         }
@@ -1736,7 +1736,7 @@ static inline uint32_t insn_get(DisasCon
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = ldul_code(s->pc);
         s->pc += 4;
         break;
     }
Index: target-m68k/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/exec.h,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 exec.h
--- target-m68k/exec.h	14 Oct 2007 07:07:06 -0000	1.5
+++ target-m68k/exec.h	14 Oct 2007 11:36:54 -0000
@@ -42,6 +42,9 @@ int cpu_m68k_handle_mmu_fault (CPUState 
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_m68k_flush_flags(CPUM68KState *env, int cc_op);
Index: target-m68k/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/op_helper.c,v
retrieving revision 1.7
diff -u -d -d -p -r1.7 op_helper.c
--- target-m68k/op_helper.c	14 Oct 2007 07:07:06 -0000	1.7
+++ target-m68k/op_helper.c	14 Oct 2007 11:36:54 -0000
@@ -33,6 +33,21 @@ extern int semihosting_enabled;
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -44,6 +59,7 @@ extern int semihosting_enabled;
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
@@ -83,8 +99,8 @@ static void do_rte(void)
     uint32_t fmt;
 
     sp = env->aregs[7];
-    fmt = ldl_kernel(sp);
-    env->pc = ldl_kernel(sp + 4);
+    fmt = ldul_kernel(sp);
+    env->pc = ldul_kernel(sp + 4);
     sp |= (fmt >> 28) & 3;
     env->sr = fmt & 0xffff;
     m68k_switch_sp(env);
@@ -112,7 +128,7 @@ void do_interrupt(int is_hw)
                     && (env->sr & SR_S) != 0
                     && (env->pc & 3) == 0
                     && lduw_code(env->pc - 4) == 0x4e71
-                    && ldl_code(env->pc) == 0x4e7bf000) {
+                    && ldul_code(env->pc) == 0x4e7bf000) {
                 env->pc += 4;
                 do_m68k_semihosting(env, env->dregs[0]);
                 return;
@@ -153,7 +169,7 @@ void do_interrupt(int is_hw)
     stl_kernel(sp, fmt);
     env->aregs[7] = sp;
     /* Jump to vector.  */
-    env->pc = ldl_kernel(env->vbr + vector);
+    env->pc = ldul_kernel(env->vbr + vector);
 }
 
 #endif
Index: target-m68k/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-m68k/op_mem.h,v
retrieving revision 1.1
diff -u -d -d -p -r1.1 op_mem.h
--- target-m68k/op_mem.h	23 May 2007 19:58:11 -0000	1.1
+++ target-m68k/op_mem.h	14 Oct 2007 11:36:54 -0000
@@ -11,7 +11,7 @@ MEM_LD_OP(8u32,ub)
 MEM_LD_OP(8s32,sb)
 MEM_LD_OP(16u32,uw)
 MEM_LD_OP(16s32,sw)
-MEM_LD_OP(32,l)
+MEM_LD_OP(32,ul)
 
 #undef MEM_LD_OP
 
Index: target-mips/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-mips/exec.h,v
retrieving revision 1.39
diff -u -d -d -p -r1.39 exec.h
--- target-mips/exec.h	14 Oct 2007 07:07:07 -0000	1.39
+++ target-mips/exec.h	14 Oct 2007 11:36:55 -0000
@@ -54,6 +54,9 @@ register target_ulong T2 asm(AREG3);
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 #if defined(TARGET_MIPSN32) || defined(TARGET_MIPS64)
Index: target-mips/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/op_helper.c,v
retrieving revision 1.66
diff -u -d -d -p -r1.66 op_helper.c
--- target-mips/op_helper.c	14 Oct 2007 07:07:07 -0000	1.66
+++ target-mips/op_helper.c	14 Oct 2007 11:36:55 -0000
@@ -544,6 +544,21 @@ static void do_unaligned_access (target_
 #define MMUSUFFIX _mmu
 #define ALIGNED_ONLY
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -555,6 +570,7 @@ static void do_unaligned_access (target_
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr)
 {
Index: target-mips/op_mem.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/op_mem.c,v
retrieving revision 1.14
diff -u -d -d -p -r1.14 op_mem.c
--- target-mips/op_mem.c	9 Oct 2007 03:12:08 -0000	1.14
+++ target-mips/op_mem.c	14 Oct 2007 11:36:55 -0000
@@ -57,13 +57,13 @@ void glue(op_sh, MEMSUFFIX) (void)
 
 void glue(op_lw, MEMSUFFIX) (void)
 {
-    T0 = glue(ldl, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 
 void glue(op_lwu, MEMSUFFIX) (void)
 {
-    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(T0);
+    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 
@@ -167,7 +167,7 @@ void glue(op_swr, MEMSUFFIX) (void)
 void glue(op_ll, MEMSUFFIX) (void)
 {
     T1 = T0;
-    T0 = glue(ldl, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)(T0);
     env->CP0_LLAddr = T1;
     RETURN();
 }
@@ -383,7 +383,7 @@ void glue(op_scd, MEMSUFFIX) (void)
 
 void glue(op_lwc1, MEMSUFFIX) (void)
 {
-    WT0 = glue(ldl, MEMSUFFIX)(T0);
+    WT0 = glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 void glue(op_swc1, MEMSUFFIX) (void)
Index: target-mips/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-mips/translate.c,v
retrieving revision 1.106
diff -u -d -d -p -r1.106 translate.c
--- target-mips/translate.c	9 Oct 2007 03:39:58 -0000	1.106
+++ target-mips/translate.c	14 Oct 2007 11:36:55 -0000
@@ -6544,7 +6544,7 @@ gen_intermediate_code_internal (CPUState
             gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
             gen_opc_instr_start[lj] = 1;
         }
-        ctx.opcode = ldl_code(ctx.pc);
+        ctx.opcode = ldul_code(ctx.pc);
         decode_opc(env, &ctx);
         ctx.pc += 4;
 
Index: target-ppc/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/exec.h,v
retrieving revision 1.29
diff -u -d -d -p -r1.29 exec.h
--- target-ppc/exec.h	14 Oct 2007 07:07:07 -0000	1.29
+++ target-ppc/exec.h	14 Oct 2007 11:36:55 -0000
@@ -91,7 +91,12 @@ static always_inline target_ulong rotl64
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
+
+#include "softmmu_exec.h"
+#define REVERSE_ENDIAN
 #include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
+
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 void do_raise_exception_err (uint32_t exception, int error_code);
Index: target-ppc/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/helper.c,v
retrieving revision 1.80
diff -u -d -d -p -r1.80 helper.c
--- target-ppc/helper.c	14 Oct 2007 10:21:20 -0000	1.80
+++ target-ppc/helper.c	14 Oct 2007 11:36:55 -0000
@@ -569,8 +569,8 @@ static always_inline int _find_pte (mmu_
         } else
 #endif
         {
-            pte0 = ldl_phys(base + (i * 8));
-            pte1 =  ldl_phys(base + (i * 8) + 4);
+            pte0 = ldul_phys(base + (i * 8));
+            pte1 =  ldul_phys(base + (i * 8) + 4);
             r = pte32_check(ctx, pte0, pte1, h, rw, type);
 #if defined (DEBUG_MMU)
             if (loglevel != 0) {
@@ -686,7 +686,7 @@ static int slb_lookup (CPUPPCState *env,
     mask = 0x0000000000000000ULL; /* Avoid gcc warning */
     for (n = 0; n < env->slb_nr; n++) {
         tmp64 = ldq_phys(sr_base);
-        tmp = ldl_phys(sr_base + 8);
+        tmp = ldul_phys(sr_base + 8);
 #if defined(DEBUG_SLB)
         if (loglevel != 0) {
             fprintf(logfile, "%s: seg %d " PADDRX " %016" PRIx64 " %08"
@@ -784,7 +784,7 @@ target_ulong ppc_load_slb (CPUPPCState *
     sr_base = env->spr[SPR_ASR];
     sr_base += 12 * slb_nr;
     tmp64 = ldq_phys(sr_base);
-    tmp = ldl_phys(sr_base + 8);
+    tmp = ldul_phys(sr_base + 8);
     if (tmp64 & 0x0000000008000000ULL) {
         /* SLB entry is valid */
         /* Copy SLB bits 62:88 to Rt 37:63 (VSID 23:49) */
@@ -990,10 +990,10 @@ static int get_segment (CPUState *env, m
                         sdr, mask + 0x80);
                 for (curaddr = sdr; curaddr < (sdr + mask + 0x80);
                      curaddr += 16) {
-                    a0 = ldl_phys(curaddr);
-                    a1 = ldl_phys(curaddr + 4);
-                    a2 = ldl_phys(curaddr + 8);
-                    a3 = ldl_phys(curaddr + 12);
+                    a0 = ldul_phys(curaddr);
+                    a1 = ldul_phys(curaddr + 4);
+                    a2 = ldul_phys(curaddr + 8);
+                    a3 = ldul_phys(curaddr + 12);
                     if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) {
                         fprintf(logfile,
                                 PADDRX ": %08x %08x %08x %08x\n",
@@ -2266,7 +2266,7 @@ static always_inline void powerpc_excp (
 #endif
         /* XXX: this is false */
         /* Get rS/rD and rA from faulting opcode */
-        env->spr[SPR_DSISR] |= (ldl_code((env->nip - 4)) & 0x03FF0000) >> 16;
+        env->spr[SPR_DSISR] |= (ldul_code((env->nip - 4)) & 0x03FF0000) >> 16;
         goto store_current;
     case POWERPC_EXCP_PROGRAM:   /* Program exception                        */
         switch (env->error_code & ~0xF) {
Index: target-ppc/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper.c,v
retrieving revision 1.51
diff -u -d -d -p -r1.51 op_helper.c
--- target-ppc/op_helper.c	14 Oct 2007 08:27:14 -0000	1.51
+++ target-ppc/op_helper.c	14 Oct 2007 11:36:55 -0000
@@ -2296,6 +2296,21 @@ DO_SPE_OP1(fsctuf);
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2307,6 +2322,7 @@ DO_SPE_OP1(fsctuf);
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
Index: target-ppc/op_helper.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper.h,v
retrieving revision 1.21
diff -u -d -d -p -r1.21 op_helper.h
--- target-ppc/op_helper.h	7 Oct 2007 17:13:44 -0000	1.21
+++ target-ppc/op_helper.h	14 Oct 2007 11:36:55 -0000
@@ -37,19 +37,6 @@ void glue(do_POWER2_lfq_le, MEMSUFFIX) (
 void glue(do_POWER2_stfq, MEMSUFFIX) (void);
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void);
 
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst);
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stsw_64, MEMSUFFIX) (int src);
-void glue(do_stsw_le_64, MEMSUFFIX) (int src);
-void glue(do_lmw_64, MEMSUFFIX) (int dst);
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stmw_64, MEMSUFFIX) (int src);
-void glue(do_stmw_le_64, MEMSUFFIX) (int src);
-void glue(do_icbi_64, MEMSUFFIX) (void);
-void glue(do_dcbz_64, MEMSUFFIX) (void);
-#endif
-
 #else
 
 void do_print_mem_EA (target_ulong EA);
Index: target-ppc/op_helper_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_helper_mem.h,v
retrieving revision 1.14
diff -u -d -d -p -r1.14 op_helper_mem.h
--- target-ppc/op_helper_mem.h	7 Oct 2007 17:13:44 -0000	1.14
+++ target-ppc/op_helper_mem.h	14 Oct 2007 11:36:55 -0000
@@ -19,85 +19,33 @@
  */
 
 /* Multiple word / string load and store */
-static always_inline target_ulong glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000UL) >> 24) | ((tmp & 0x00FF0000UL) >> 8) |
-        ((tmp & 0x0000FF00UL) << 8) | ((tmp & 0x000000FFUL) << 24);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  target_ulong data)
-{
-    uint32_t tmp =
-        ((data & 0xFF000000UL) >> 24) | ((data & 0x00FF0000UL) >> 8) |
-        ((data & 0x0000FF00UL) << 8) | ((data & 0x000000FFUL) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
 void glue(do_lmw, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldul, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lmw_le, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldulr, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw_le, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_le_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lsw, MEMSUFFIX) (int dst)
 {
@@ -105,71 +53,33 @@ void glue(do_lsw, MEMSUFFIX) (int dst)
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst++] = glue(ldul, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
-#endif
 
 void glue(do_stsw, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 void glue(do_lsw_le, MEMSUFFIX) (int dst)
 {
@@ -177,71 +87,33 @@ void glue(do_lsw_le, MEMSUFFIX) (int dst
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst++] = glue(ldulr, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
-#endif
 
 void glue(do_stsw_le, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_le_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 /* Instruction cache invalidation helper */
 void glue(do_icbi, MEMSUFFIX) (void)
@@ -252,27 +124,11 @@ void glue(do_icbi, MEMSUFFIX) (void)
      * (not a fetch) by the MMU. To be sure it will be so,
      * do the load "by hand".
      */
-    tmp = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint32_t)T0,
-                             (uint32_t)(T0 + env->icache_line_size));
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_icbi_64, MEMSUFFIX) (void)
-{
-    uint64_t tmp;
-    /* Invalidate one cache line :
-     * PowerPC specification says this is to be treated like a load
-     * (not a fetch) by the MMU. To be sure it will be so,
-     * do the load "by hand".
-     */
-    tmp = glue(ldq, MEMSUFFIX)((uint64_t)T0);
+    tmp = glue(ldul, MEMSUFFIX)(T0);
     T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint64_t)T0,
-                             (uint64_t)(T0 + env->icache_line_size));
+    /* We assume it would not wrap around 2^32 on 32 bits targets */
+    tb_invalidate_page_range(T0, T0 + env->icache_line_size);
 }
-#endif
 
 void glue(do_dcbz, MEMSUFFIX) (void)
 {
@@ -281,90 +137,43 @@ void glue(do_dcbz, MEMSUFFIX) (void)
     /* XXX: should be 970 specific (?) */
     if (((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1)
         dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x1C), 0);
-    if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x3CUL), 0);
-        if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x7CUL), 0);
-        }
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_dcbz_64, MEMSUFFIX) (void)
-{
-    int dcache_line_size = env->dcache_line_size;
-
-    /* XXX: should be 970 specific (?) */
-    if (((env->spr[SPR_970_HID5] >> 6) & 0x3) == 0x2)
-        dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x1C), 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x00, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x04, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x08, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x0C, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x10, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x14, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x18, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x1C, 0);
     if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x3CUL), 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x20UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x24UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x28UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x2CUL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x30UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x34UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x38UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x3CUL, 0);
         if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x7CUL), 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x40UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x44UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x48UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x4CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x50UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x54UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x58UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x5CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x60UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x64UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x68UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x6CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x70UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x74UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x78UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x7CUL, 0);
         }
     }
 }
-#endif
 
 /* PowerPC 601 specific instructions (POWER bridge) */
 // XXX: to be tested
@@ -400,26 +209,6 @@ void glue(do_POWER2_lfq, MEMSUFFIX) (voi
     FT1 = glue(ldfq, MEMSUFFIX)((uint32_t)(T0 + 4));
 }
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 void glue(do_POWER2_lfq_le, MEMSUFFIX) (void)
 {
     FT0 = glue(ldfqr, MEMSUFFIX)((uint32_t)(T0 + 4));
@@ -432,25 +221,6 @@ void glue(do_POWER2_stfq, MEMSUFFIX) (vo
     glue(stfq, MEMSUFFIX)((uint32_t)(T0 + 4), FT1);
 }
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void)
 {
     glue(stfqr, MEMSUFFIX)((uint32_t)(T0 + 4), FT0);
Index: target-ppc/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/op_mem.h,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 op_mem.h
--- target-ppc/op_mem.h	7 Oct 2007 18:19:25 -0000	1.22
+++ target-ppc/op_mem.h	14 Oct 2007 11:36:55 -0000
@@ -18,85 +18,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-static always_inline uint16_t glue(ld16r, MEMSUFFIX) (target_ulong EA)
-{
-    uint16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline int32_t glue(ld16rs, MEMSUFFIX) (target_ulong EA)
-{
-    int16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return (int16_t)((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline uint32_t glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline uint64_t glue(ld64r, MEMSUFFIX) (target_ulong EA)
-{
-    uint64_t tmp = glue(ldq, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00000000000000ULL) >> 56) |
-        ((tmp & 0x00FF000000000000ULL) >> 40) |
-        ((tmp & 0x0000FF0000000000ULL) >> 24) |
-        ((tmp & 0x000000FF00000000ULL) >> 8) |
-        ((tmp & 0x00000000FF000000ULL) << 8) |
-        ((tmp & 0x0000000000FF0000ULL) << 24) |
-        ((tmp & 0x000000000000FF00ULL) << 40) |
-        ((tmp & 0x00000000000000FFULL) << 54);
-}
-#endif
-
-#if defined(TARGET_PPC64)
-static always_inline int64_t glue(ldsl, MEMSUFFIX) (target_ulong EA)
-{
-    return (int32_t)glue(ldl, MEMSUFFIX)(EA);
-}
-
-static always_inline int64_t glue(ld32rs, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return (int32_t)((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-#endif
-
-static always_inline void glue(st16r, MEMSUFFIX) (target_ulong EA,
-                                                  uint16_t data)
-{
-    uint16_t tmp = ((data & 0xFF00) >> 8) | ((data & 0x00FF) << 8);
-    glue(stw, MEMSUFFIX)(EA, tmp);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  uint32_t data)
-{
-    uint32_t tmp = ((data & 0xFF000000) >> 24) | ((data & 0x00FF0000) >> 8) |
-        ((data & 0x0000FF00) << 8) | ((data & 0x000000FF) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline void glue(st64r, MEMSUFFIX) (target_ulong EA,
-                                                  uint64_t data)
-{
-    uint64_t tmp = ((data & 0xFF00000000000000ULL) >> 56) |
-        ((data & 0x00FF000000000000ULL) >> 40) |
-        ((data & 0x0000FF0000000000ULL) >> 24) |
-        ((data & 0x000000FF00000000ULL) >> 8) |
-        ((data & 0x00000000FF000000ULL) << 8) |
-        ((data & 0x0000000000FF0000ULL) << 24) |
-        ((data & 0x000000000000FF00ULL) << 40) |
-        ((data & 0x00000000000000FFULL) << 56);
-    glue(stq, MEMSUFFIX)(EA, tmp);
-}
-#endif
-
 /***                             Integer load                              ***/
 #define PPC_LD_OP(name, op)                                                   \
 void OPPROTO glue(glue(op_l, name), MEMSUFFIX) (void)                         \
@@ -130,10 +51,11 @@ void OPPROTO glue(glue(glue(op_st, name)
 }
 #endif
 
+/* Native-endian fixed-point memory loads                                    */
 PPC_LD_OP(bz, ldub);
 PPC_LD_OP(ha, ldsw);
 PPC_LD_OP(hz, lduw);
-PPC_LD_OP(wz, ldl);
+PPC_LD_OP(wz, ldul);
 #if defined(TARGET_PPC64)
 PPC_LD_OP(d, ldq);
 PPC_LD_OP(wa, ldsl);
@@ -142,23 +64,24 @@ PPC_LD_OP_64(wa, ldsl);
 PPC_LD_OP_64(bz, ldub);
 PPC_LD_OP_64(ha, ldsw);
 PPC_LD_OP_64(hz, lduw);
-PPC_LD_OP_64(wz, ldl);
+PPC_LD_OP_64(wz, ldul);
 #endif
 
-PPC_LD_OP(ha_le, ld16rs);
-PPC_LD_OP(hz_le, ld16r);
-PPC_LD_OP(wz_le, ld32r);
+/* Reverse-endian fixed-point memory loads                                   */
+PPC_LD_OP(ha_le, ldswr);
+PPC_LD_OP(hz_le, lduwr);
+PPC_LD_OP(wz_le, ldulr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP(d_le, ld64r);
-PPC_LD_OP(wa_le, ld32rs);
-PPC_LD_OP_64(d_le, ld64r);
-PPC_LD_OP_64(wa_le, ld32rs);
-PPC_LD_OP_64(ha_le, ld16rs);
-PPC_LD_OP_64(hz_le, ld16r);
-PPC_LD_OP_64(wz_le, ld32r);
+PPC_LD_OP(d_le, ldqr);
+PPC_LD_OP(wa_le, ldslr);
+PPC_LD_OP_64(d_le, ldqr);
+PPC_LD_OP_64(wa_le, ldslr);
+PPC_LD_OP_64(ha_le, ldswr);
+PPC_LD_OP_64(hz_le, lduwr);
+PPC_LD_OP_64(wz_le, ldulr);
 #endif
 
-/***                              Integer store                            ***/
+/* Native-endian fixed-point memory stores                                   */
 PPC_ST_OP(b, stb);
 PPC_ST_OP(h, stw);
 PPC_ST_OP(w, stl);
@@ -170,120 +93,110 @@ PPC_ST_OP_64(h, stw);
 PPC_ST_OP_64(w, stl);
 #endif
 
-PPC_ST_OP(h_le, st16r);
-PPC_ST_OP(w_le, st32r);
+/* Reverse-endian fixed-point memory stores                                  */
+PPC_ST_OP(h_le, stwr);
+PPC_ST_OP(w_le, stlr);
 #if defined(TARGET_PPC64)
-PPC_ST_OP(d_le, st64r);
-PPC_ST_OP_64(d_le, st64r);
-PPC_ST_OP_64(h_le, st16r);
-PPC_ST_OP_64(w_le, st32r);
+PPC_ST_OP(d_le, stqr);
+PPC_ST_OP_64(d_le, stqr);
+PPC_ST_OP_64(h_le, stwr);
+PPC_ST_OP_64(w_le, stlr);
 #endif
 
-/***                Integer load and store with byte reverse               ***/
-PPC_LD_OP(hbr, ld16r);
-PPC_LD_OP(wbr, ld32r);
-PPC_ST_OP(hbr, st16r);
-PPC_ST_OP(wbr, st32r);
+/* Native-endian fixed-point loads and stores with byte-reverse              */
+PPC_LD_OP(hbr, lduwr);
+PPC_LD_OP(wbr, ldulr);
+PPC_ST_OP(hbr, stwr);
+PPC_ST_OP(wbr, stlr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP_64(hbr, ld16r);
-PPC_LD_OP_64(wbr, ld32r);
-PPC_ST_OP_64(hbr, st16r);
-PPC_ST_OP_64(wbr, st32r);
+PPC_LD_OP_64(hbr, lduwr);
+PPC_LD_OP_64(wbr, ldulr);
+PPC_ST_OP_64(hbr, stwr);
+PPC_ST_OP_64(wbr, stlr);
 #endif
 
+/* Reverse-endian fixed-point loads and stores with byte-reverse             */
 PPC_LD_OP(hbr_le, lduw);
-PPC_LD_OP(wbr_le, ldl);
+PPC_LD_OP(wbr_le, ldul);
 PPC_ST_OP(hbr_le, stw);
 PPC_ST_OP(wbr_le, stl);
 #if defined(TARGET_PPC64)
 PPC_LD_OP_64(hbr_le, lduw);
-PPC_LD_OP_64(wbr_le, ldl);
+PPC_LD_OP_64(wbr_le, ldul);
 PPC_ST_OP_64(hbr_le, stw);
 PPC_ST_OP_64(wbr_le, stl);
 #endif
 
-/***                    Integer load and store multiple                    ***/
+/* Native-endian fixed-point loads and stores multiple                       */
 void OPPROTO glue(op_lmw, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
-{
-    glue(do_lmw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
-void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
 {
     glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
+#endif
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
+/* Reverse-endian fixed-point loads and stores multiple                      */
+void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
 {
-    glue(do_stmw_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
 void OPPROTO glue(op_stmw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
-{
-    glue(do_stmw_le_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-/***                    Integer load and store strings                     ***/
-void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw, MEMSUFFIX)(PARAM1);
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+    glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+/* Native-endian loads and stores string                                     */
+void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -303,6 +216,7 @@ void OPPROTO glue(op_lswx, MEMSUFFIX) (v
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
@@ -320,13 +234,44 @@ void OPPROTO glue(op_lswx_64, MEMSUFFIX)
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
+void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
+{
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
+/* Reverse-endian loads and stores string                                    */
+void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+{
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
 void OPPROTO glue(op_lswx_le, MEMSUFFIX) (void)
 {
     /* Note: T1 comes from xer_bc then no cast is needed */
@@ -337,6 +282,7 @@ void OPPROTO glue(op_lswx_le, MEMSUFFIX)
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
@@ -354,29 +300,16 @@ void OPPROTO glue(op_lswx_le_64, MEMSUFF
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
-{
-    glue(do_stsw, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
-{
-    glue(do_stsw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
 void OPPROTO glue(op_stsw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
@@ -384,7 +317,7 @@ void OPPROTO glue(op_stsw_le, MEMSUFFIX)
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_stsw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_stsw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -432,38 +365,9 @@ PPC_STF_OP_64(fs, stfs);
 PPC_STF_OP_64(fiwx, stfiwx);
 #endif
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 static always_inline void glue(stfsr, MEMSUFFIX) (target_ulong EA, double d)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = float64_to_float32(d, &env->fp_status);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stfl, MEMSUFFIX)(EA, u.f);
+    glue(stflr, MEMSUFFIX)(EA, float64_to_float32(d, &env->fp_status));
 }
 
 static always_inline void glue(stfiwxr, MEMSUFFIX) (target_ulong EA, double d)
@@ -475,11 +379,7 @@ static always_inline void glue(stfiwxr, 
 
     /* Store the low order 32 bits without any conversion */
     u.d = d;
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stl, MEMSUFFIX)(EA, u.u);
+    glue(stlr, MEMSUFFIX)(EA, u.u);
 }
 
 PPC_STF_OP(fd_le, stfqr);
@@ -520,40 +420,9 @@ PPC_LDF_OP_64(fd, ldfq);
 PPC_LDF_OP_64(fs, ldfs);
 #endif
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 static always_inline double glue(ldfsr, MEMSUFFIX) (target_ulong EA)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = glue(ldfl, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-
-    return float32_to_float64(u.f, &env->fp_status);
+    return float32_to_float64(glue(ldflr, MEMSUFFIX)(EA), &env->fp_status);
 }
 
 PPC_LDF_OP(fd_le, ldfqr);
@@ -569,7 +438,7 @@ void OPPROTO glue(op_lwarx, MEMSUFFIX) (
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -581,7 +450,7 @@ void OPPROTO glue(op_lwarx_64, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -615,7 +484,7 @@ void OPPROTO glue(op_lwarx_le, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -627,7 +496,7 @@ void OPPROTO glue(op_lwarx_le_64, MEMSUF
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -638,7 +507,7 @@ void OPPROTO glue(op_ldarx_le, MEMSUFFIX
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -649,7 +518,7 @@ void OPPROTO glue(op_ldarx_le_64, MEMSUF
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -731,7 +600,7 @@ void OPPROTO glue(op_stwcx_le, MEMSUFFIX
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -748,7 +617,7 @@ void OPPROTO glue(op_stwcx_le_64, MEMSUF
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -764,7 +633,7 @@ void OPPROTO glue(op_stdcx_le, MEMSUFFIX
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -780,7 +649,7 @@ void OPPROTO glue(op_stdcx_le_64, MEMSUF
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -862,6 +731,7 @@ void OPPROTO glue(op_dcbz_l128, MEMSUFFI
 
 void OPPROTO glue(op_dcbz, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
@@ -940,7 +810,7 @@ void OPPROTO glue(op_dcbz_l128_64, MEMSU
 
 void OPPROTO glue(op_dcbz_64, MEMSUFFIX) (void)
 {
-    glue(do_dcbz_64, MEMSUFFIX)();
+    glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -948,6 +818,7 @@ void OPPROTO glue(op_dcbz_64, MEMSUFFIX)
 /* Instruction cache block invalidate */
 void OPPROTO glue(op_icbi, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
@@ -955,7 +826,7 @@ void OPPROTO glue(op_icbi, MEMSUFFIX) (v
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_icbi_64, MEMSUFFIX) (void)
 {
-    glue(do_icbi_64, MEMSUFFIX)();
+    glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -963,14 +834,14 @@ void OPPROTO glue(op_icbi_64, MEMSUFFIX)
 /* External access */
 void OPPROTO glue(op_eciwx, MEMSUFFIX) (void)
 {
-    T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
@@ -991,28 +862,28 @@ void OPPROTO glue(op_ecowx_64, MEMSUFFIX
 
 void OPPROTO glue(op_eciwx_le, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_le_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
 
 void OPPROTO glue(op_ecowx_le, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_ecowx_le_64, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
     RETURN();
 }
 #endif
@@ -1070,8 +941,8 @@ void OPPROTO glue(op_vr_lvx, MEMSUFFIX) 
 
 void OPPROTO glue(op_vr_lvx_le, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint32_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint32_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint32_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx, MEMSUFFIX) (void)
@@ -1082,8 +953,8 @@ void OPPROTO glue(op_vr_stvx, MEMSUFFIX)
 
 void OPPROTO glue(op_vr_stvx_le, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 
 #if defined(TARGET_PPC64)
@@ -1095,8 +966,8 @@ void OPPROTO glue(op_vr_lvx_64, MEMSUFFI
 
 void OPPROTO glue(op_vr_lvx_le_64, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint64_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint64_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint64_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx_64, MEMSUFFIX) (void)
@@ -1107,8 +978,8 @@ void OPPROTO glue(op_vr_stvx_64, MEMSUFF
 
 void OPPROTO glue(op_vr_stvx_le_64, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 #endif
 #undef VR_DWORD0
@@ -1163,14 +1034,14 @@ _PPC_SPE_ST_OP(name, op)
 #if !defined(TARGET_PPC64)
 PPC_SPE_LD_OP(dd, ldq);
 PPC_SPE_ST_OP(dd, stq);
-PPC_SPE_LD_OP(dd_le, ld64r);
-PPC_SPE_ST_OP(dd_le, st64r);
+PPC_SPE_LD_OP(dd_le, ldqr);
+PPC_SPE_ST_OP(dd_le, stqr);
 #endif
 static always_inline uint64_t glue(spe_ldw, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ldl, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ldl, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldul, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldul, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw, spe_ldw);
@@ -1184,16 +1055,16 @@ PPC_SPE_ST_OP(dw, spe_stdw);
 static always_inline uint64_t glue(spe_ldw_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld32r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld32r, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldulr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldulr, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw_le, spe_ldw_le);
 static always_inline void glue(spe_stdw_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data >> 32);
-    glue(st32r, MEMSUFFIX)(EA + 4, data);
+    glue(stlr, MEMSUFFIX)(EA, data >> 32);
+    glue(stlr, MEMSUFFIX)(EA + 4, data);
 }
 PPC_SPE_ST_OP(dw_le, spe_stdw_le);
 static always_inline uint64_t glue(spe_ldh, MEMSUFFIX) (target_ulong EA)
@@ -1218,20 +1089,20 @@ PPC_SPE_ST_OP(dh, spe_stdh);
 static always_inline uint64_t glue(spe_ldh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 4) << 16;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 6);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 4) << 16;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 6);
     return ret;
 }
 PPC_SPE_LD_OP(dh_le, spe_ldh_le);
 static always_inline void glue(spe_stdh_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 4, data >> 16);
-    glue(st16r, MEMSUFFIX)(EA + 6, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 4, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA + 6, data);
 }
 PPC_SPE_ST_OP(dh_le, spe_stdh_le);
 static always_inline uint64_t glue(spe_lwhe, MEMSUFFIX) (target_ulong EA)
@@ -1252,16 +1123,16 @@ PPC_SPE_ST_OP(whe, spe_stwhe);
 static always_inline uint64_t glue(spe_lwhe_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 16;
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 16;
     return ret;
 }
 PPC_SPE_LD_OP(whe_le, spe_lwhe_le);
 static always_inline void glue(spe_stwhe_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 16);
 }
 PPC_SPE_ST_OP(whe_le, spe_stwhe_le);
 static always_inline uint64_t glue(spe_lwhou, MEMSUFFIX) (target_ulong EA)
@@ -1290,24 +1161,24 @@ PPC_SPE_ST_OP(who, spe_stwho);
 static always_inline uint64_t glue(spe_lwhou_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2);
     return ret;
 }
 PPC_SPE_LD_OP(whou_le, spe_lwhou_le);
 static always_inline uint64_t glue(spe_lwhos_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = ((uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA))) << 32;
-    ret |= (uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA + 2));
+    ret = ((uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA))) << 32;
+    ret |= (uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA + 2));
     return ret;
 }
 PPC_SPE_LD_OP(whos_le, spe_lwhos_le);
 static always_inline void glue(spe_stwho_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 2, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 2, data);
 }
 PPC_SPE_ST_OP(who_le, spe_stwho_le);
 #if !defined(TARGET_PPC64)
@@ -1320,7 +1191,7 @@ PPC_SPE_ST_OP(wwo, spe_stwwo);
 static always_inline void glue(spe_stwwo_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data);
+    glue(stlr, MEMSUFFIX)(EA, data);
 }
 PPC_SPE_ST_OP(wwo_le, spe_stwwo_le);
 #endif
@@ -1334,14 +1205,14 @@ PPC_SPE_LD_OP(h, spe_lh);
 static always_inline uint64_t glue(spe_lh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 48) | ((uint64_t)tmp << 16);
 }
 PPC_SPE_LD_OP(h_le, spe_lh_le);
 static always_inline uint64_t glue(spe_lwwsplat, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ldl, MEMSUFFIX)(EA);
+    tmp = glue(ldul, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat, spe_lwwsplat);
@@ -1349,7 +1220,7 @@ static always_inline
 uint64_t glue(spe_lwwsplat_le, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ld32r, MEMSUFFIX)(EA);
+    tmp = glue(ldulr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat_le, spe_lwwsplat_le);
@@ -1369,9 +1240,9 @@ uint64_t glue(spe_lwhsplat_le, MEMSUFFIX
 {
     uint64_t ret;
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     ret = ((uint64_t)tmp << 48) | ((uint64_t)tmp << 32);
-    tmp = glue(ld16r, MEMSUFFIX)(EA + 2);
+    tmp = glue(lduwr, MEMSUFFIX)(EA + 2);
     ret |= ((uint64_t)tmp << 16) | (uint64_t)tmp;
     return ret;
 }
Index: target-ppc/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-ppc/translate.c,v
retrieving revision 1.93
diff -u -d -d -p -r1.93 translate.c
--- target-ppc/translate.c	14 Oct 2007 07:07:07 -0000	1.93
+++ target-ppc/translate.c	14 Oct 2007 11:36:56 -0000
@@ -6756,7 +6756,7 @@ static always_inline int gen_intermediat
                     ctx.nip, 1 - msr_pr, msr_ir);
         }
 #endif
-        ctx.opcode = ldl_code(ctx.nip);
+        ctx.opcode = ldul_code(ctx.nip);
         if (msr_le) {
             ctx.opcode = ((ctx.opcode & 0xFF000000) >> 24) |
                 ((ctx.opcode & 0x00FF0000) >> 8) |
Index: target-sh4/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/exec.h,v
retrieving revision 1.6
diff -u -d -d -p -r1.6 exec.h
--- target-sh4/exec.h	14 Oct 2007 07:07:08 -0000	1.6
+++ target-sh4/exec.h	14 Oct 2007 11:36:56 -0000
@@ -48,6 +48,9 @@ static inline int cpu_halted(CPUState *e
 
 #ifndef CONFIG_USER_ONLY
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 #define RETURN() __asm__ __volatile__("")
Index: target-sh4/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/op_helper.c,v
retrieving revision 1.5
diff -u -d -d -p -r1.5 op_helper.c
--- target-sh4/op_helper.c	14 Oct 2007 07:07:08 -0000	1.5
+++ target-sh4/op_helper.c	14 Oct 2007 11:36:56 -0000
@@ -30,6 +30,21 @@ void do_raise_exception(void)
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -41,6 +56,7 @@ void do_raise_exception(void)
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr)
 {
Index: target-sh4/op_mem.c
===================================================================
RCS file: /sources/qemu/qemu/target-sh4/op_mem.c,v
retrieving revision 1.3
diff -u -d -d -p -r1.3 op_mem.c
--- target-sh4/op_mem.c	16 Sep 2007 21:08:05 -0000	1.3
+++ target-sh4/op_mem.c	14 Oct 2007 11:36:56 -0000
@@ -48,7 +48,7 @@ void glue(op_stw_T0_T1, MEMSUFFIX) (void
 }
 
 void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
-    T0 = glue(ldl, MEMSUFFIX) (T0);
+    T0 = glue(ldul, MEMSUFFIX) (T0);
     RETURN();
 }
 
Index: target-sparc/exec.h
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/exec.h,v
retrieving revision 1.22
diff -u -d -d -p -r1.22 exec.h
--- target-sparc/exec.h	14 Oct 2007 07:07:08 -0000	1.22
+++ target-sparc/exec.h	14 Oct 2007 11:36:56 -0000
@@ -100,6 +100,9 @@ void do_rdpsr();
 /* XXX: move that to a generic header */
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-sparc/helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/helper.c,v
retrieving revision 1.28
diff -u -d -d -p -r1.28 helper.c
--- target-sparc/helper.c	14 Oct 2007 07:07:08 -0000	1.28
+++ target-sparc/helper.c	14 Oct 2007 11:36:56 -0000
@@ -130,7 +130,7 @@ int get_physical_address (CPUState *env,
     /* SPARC reference MMU table walk: Context table->L1->L2->PTE */
     /* Context base + context number */
     pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
 
     /* Ctx pde */
     switch (pde & PTE_ENTRYTYPE_MASK) {
@@ -142,7 +142,7 @@ int get_physical_address (CPUState *env,
         return 4 << 2;
     case 1: /* L0 PDE */
         pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
-        pde = ldl_phys(pde_ptr);
+        pde = ldul_phys(pde_ptr);
 
         switch (pde & PTE_ENTRYTYPE_MASK) {
         default:
@@ -152,7 +152,7 @@ int get_physical_address (CPUState *env,
             return (1 << 8) | (4 << 2);
         case 1: /* L1 PDE */
             pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
-            pde = ldl_phys(pde_ptr);
+            pde = ldul_phys(pde_ptr);
 
             switch (pde & PTE_ENTRYTYPE_MASK) {
             default:
@@ -162,7 +162,7 @@ int get_physical_address (CPUState *env,
                 return (2 << 8) | (4 << 2);
             case 1: /* L2 PDE */
                 pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
-                pde = ldl_phys(pde_ptr);
+                pde = ldul_phys(pde_ptr);
 
                 switch (pde & PTE_ENTRYTYPE_MASK) {
                 default:
@@ -266,7 +266,7 @@ target_ulong mmu_probe(CPUState *env, ta
     /* Context base + context number */
     pde_ptr = (target_phys_addr_t)(env->mmuregs[1] << 4) +
         (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
 
     switch (pde & PTE_ENTRYTYPE_MASK) {
     default:
@@ -278,7 +278,7 @@ target_ulong mmu_probe(CPUState *env, ta
         if (mmulev == 3)
             return pde;
         pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
-        pde = ldl_phys(pde_ptr);
+        pde = ldul_phys(pde_ptr);
 
         switch (pde & PTE_ENTRYTYPE_MASK) {
         default:
@@ -291,7 +291,7 @@ target_ulong mmu_probe(CPUState *env, ta
             if (mmulev == 2)
                 return pde;
             pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
-            pde = ldl_phys(pde_ptr);
+            pde = ldul_phys(pde_ptr);
 
             switch (pde & PTE_ENTRYTYPE_MASK) {
             default:
@@ -304,7 +304,7 @@ target_ulong mmu_probe(CPUState *env, ta
                 if (mmulev == 1)
                     return pde;
                 pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
-                pde = ldl_phys(pde_ptr);
+                pde = ldul_phys(pde_ptr);
 
                 switch (pde & PTE_ENTRYTYPE_MASK) {
                 default:
@@ -331,7 +331,7 @@ void dump_mmu(CPUState *env)
 
     printf("MMU dump:\n");
     pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
     printf("Root ptr: " TARGET_FMT_plx ", ctx: %d\n",
            (target_phys_addr_t)env->mmuregs[1] << 4, env->mmuregs[2]);
     for (n = 0, va = 0; n < 256; n++, va += 16 * 1024 * 1024) {
Index: target-sparc/op_helper.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/op_helper.c,v
retrieving revision 1.42
diff -u -d -d -p -r1.42 op_helper.c
--- target-sparc/op_helper.c	14 Oct 2007 07:07:08 -0000	1.42
+++ target-sparc/op_helper.c	14 Oct 2007 11:36:56 -0000
@@ -184,11 +184,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_code(T0 & ~3);
+            ret = ldul_code(T0 & ~3);
             break;
         case 8:
-            ret = ldl_code(T0 & ~3);
-            T0 = ldl_code((T0 + 4) & ~3);
+            ret = ldul_code(T0 & ~3);
+            T0 = ldul_code((T0 + 4) & ~3);
             break;
         }
         break;
@@ -202,11 +202,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_user(T0 & ~3);
+            ret = ldul_user(T0 & ~3);
             break;
         case 8:
-            ret = ldl_user(T0 & ~3);
-            T0 = ldl_user((T0 + 4) & ~3);
+            ret = ldul_user(T0 & ~3);
+            T0 = ldul_user((T0 + 4) & ~3);
             break;
         }
         break;
@@ -220,11 +220,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_kernel(T0 & ~3);
+            ret = ldul_kernel(T0 & ~3);
             break;
         case 8:
-            ret = ldl_kernel(T0 & ~3);
-            T0 = ldl_kernel((T0 + 4) & ~3);
+            ret = ldul_kernel(T0 & ~3);
+            T0 = ldul_kernel((T0 + 4) & ~3);
             break;
         }
         break;
@@ -243,11 +243,11 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_phys(T0 & ~3);
+            ret = ldul_phys(T0 & ~3);
             break;
         case 8:
-            ret = ldl_phys(T0 & ~3);
-            T0 = ldl_phys((T0 + 4) & ~3);
+            ret = ldul_phys(T0 & ~3);
+            T0 = ldul_phys((T0 + 4) & ~3);
             break;
         }
         break;
@@ -264,13 +264,13 @@ void helper_ld_asi(int asi, int size, in
             break;
         default:
         case 4:
-            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32));
+            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
+                            | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         case 8:
-            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32));
-            T0 = ldl_phys((target_phys_addr_t)((T0 + 4) & ~3)
+            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
+                            | ((target_phys_addr_t)(asi & 0xf) << 32));
+            T0 = ldul_phys((target_phys_addr_t)((T0 + 4) & ~3)
                            | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         }
@@ -422,7 +422,7 @@ void helper_st_asi(int asi, int size)
             uint32_t src = T1 & ~3, dst = T0 & ~3, temp;
 
             for (i = 0; i < 32; i += 4, src += 4, dst += 4) {
-                temp = ldl_kernel(src);
+                temp = ldul_kernel(src);
                 stl_kernel(dst, temp);
             }
         }
@@ -525,7 +525,7 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_raw(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_raw(T0 & ~3);
+                ret = ldul_raw(T0 & ~3);
                 break;
             default:
             case 8:
@@ -673,7 +673,7 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_kernel(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_kernel(T0 & ~3);
+                ret = ldul_kernel(T0 & ~3);
                 break;
             default:
             case 8:
@@ -689,7 +689,7 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_user(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_user(T0 & ~3);
+                ret = ldul_user(T0 & ~3);
                 break;
             default:
             case 8:
@@ -711,7 +711,7 @@ void helper_ld_asi(int asi, int size, in
                 ret = lduw_phys(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_phys(T0 & ~3);
+                ret = ldul_phys(T0 & ~3);
                 break;
             default:
             case 8:
@@ -1497,6 +1497,21 @@ static void do_unaligned_access(target_u
 #define ALIGNED_ONLY
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1508,6 +1523,7 @@ static void do_unaligned_access(target_u
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access(target_ulong addr, int is_write, int is_user,
                                 void *retaddr)
Index: target-sparc/op_mem.h
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/op_mem.h,v
retrieving revision 1.10
diff -u -d -d -p -r1.10 op_mem.h
--- target-sparc/op_mem.h	21 Sep 2007 19:10:53 -0000	1.10
+++ target-sparc/op_mem.h	14 Oct 2007 11:36:56 -0000
@@ -17,7 +17,7 @@ void OPPROTO glue(glue(op_, name), MEMSU
     glue(op, MEMSUFFIX)(T0, T1);                                      \
 }
 
-SPARC_LD_OP(ld, ldl);
+SPARC_LD_OP(ld, ldul);
 SPARC_LD_OP(ldub, ldub);
 SPARC_LD_OP(lduh, lduw);
 SPARC_LD_OP_S(ldsb, ldsb);
@@ -42,15 +42,15 @@ void OPPROTO glue(op_ldstub, MEMSUFFIX)(
 
 void OPPROTO glue(op_swap, MEMSUFFIX)(void)
 {
-    target_ulong tmp = glue(ldl, MEMSUFFIX)(T0);
+    target_ulong tmp = glue(ldul, MEMSUFFIX)(T0);
     glue(stl, MEMSUFFIX)(T0, T1);       /* XXX: Should be Atomically */
     T1 = tmp;
 }
 
 void OPPROTO glue(op_ldd, MEMSUFFIX)(void)
 {
-    T1 = glue(ldl, MEMSUFFIX)(T0);
-    T0 = glue(ldl, MEMSUFFIX)((T0 + 4));
+    T1 = glue(ldul, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)((T0 + 4));
 }
 
 /***                         Floating-point store                          ***/
@@ -78,12 +78,12 @@ void OPPROTO glue(op_lddf, MEMSUFFIX) (v
 #ifdef TARGET_SPARC64
 void OPPROTO glue(op_lduw, MEMSUFFIX)(void)
 {
-    T1 = (uint64_t)(glue(ldl, MEMSUFFIX)(T0) & 0xffffffff);
+    T1 = (uint64_t)(glue(ldul, MEMSUFFIX)(T0) & 0xffffffff);
 }
 
 void OPPROTO glue(op_ldsw, MEMSUFFIX)(void)
 {
-    T1 = (int64_t)(glue(ldl, MEMSUFFIX)(T0) & 0xffffffff);
+    T1 = (int64_t)(glue(ldul, MEMSUFFIX)(T0) & 0xffffffff);
 }
 
 SPARC_LD_OP(ldx, ldq);
Index: target-sparc/translate.c
===================================================================
RCS file: /sources/qemu/qemu/target-sparc/translate.c,v
retrieving revision 1.75
diff -u -d -d -p -r1.75 translate.c
--- target-sparc/translate.c	14 Oct 2007 07:07:08 -0000	1.75
+++ target-sparc/translate.c	14 Oct 2007 11:36:56 -0000
@@ -1089,7 +1089,7 @@ static void disas_sparc_insn(DisasContex
 {
     unsigned int insn, opc, rs1, rs2, rd;
 
-    insn = ldl_code(dc->pc);
+    insn = ldul_code(dc->pc);
     opc = GET_FIELD(insn, 0, 1);
 
     rd = GET_FIELD(insn, 2, 6);

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2007-11-23 12:52 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-10-13  9:56 [Qemu-devel] RFC: reverse-endian softmmu memory accessors J. Mayer
2007-10-13 10:47 ` Blue Swirl
2007-10-13 12:43   ` J. Mayer
2007-10-13 13:07     ` Blue Swirl
2007-10-13 14:17       ` J. Mayer
2007-10-13 22:07         ` J. Mayer
2007-10-13 22:53           ` Thiemo Seufer
2007-10-14  8:19           ` Blue Swirl
2007-10-14 10:14             ` J. Mayer
2007-10-14 13:22               ` Thiemo Seufer
2007-10-15 11:55                 ` J. Mayer
2007-10-13 13:02   ` Thiemo Seufer
2007-10-14 11:49 J. Mayer
2007-10-14 12:59 ` Blue Swirl
2007-10-15 12:10   ` J. Mayer
2007-10-15 16:02     ` Blue Swirl
2007-10-15 17:45       ` Blue Swirl
2007-10-16 20:27         ` J. Mayer
2007-11-23 12:55           ` Tero Kaarlela
2007-10-15 21:06       ` J. Mayer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.