All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Blue Swirl" <blauwirbel@gmail.com>
To: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] RFC: reverse-endian softmmu memory accessors
Date: Mon, 15 Oct 2007 19:02:15 +0300	[thread overview]
Message-ID: <f43fc5580710150902l39848603q95b36c9f734295f1@mail.gmail.com> (raw)
In-Reply-To: <1192450234.9976.413.camel@rapid>

[-- Attachment #1: Type: text/plain, Size: 1232 bytes --]

On 10/15/07, J. Mayer <l_indien@magic.fr> wrote:
> On Sun, 2007-10-14 at 15:59 +0300, Blue Swirl wrote:
> > On 10/14/07, J. Mayer <l_indien@magic.fr> wrote:
> > > Here's an updated version of the patch against current CVS.
> > > This patches provides reverse-endian, little-endian and big-endian
> > > memory accessors, available with and without softmmu. It also provides
> > > an IO_MEM_REVERSE TLB flag to allow future support of per-page
> > > endianness control, which is required by some targets CPU emulations.
> > > Having reverse-endian memory accessors also make it possible to optimise
> > > reverse-endian memory access when the target CPU has dedicated
> > > instructions. For now, it includes optimisations for the PowerPC target.
> >
> > This breaks Sparc32 softmmu, I get a black screen. Your changes to
> > target-sparc and hw/sun4m.c look fine, so the problem could be in IO?
>
> Did it worked before my commits ? I may have done something wrong during
> the merge...
> I will do more checks and more tests...

If I disable the IOSWAP code, black screen is gone. I think this is
logical: the io accessors return host CPU values, therefore no byte
swapping need to be performed.

The attached version works for me.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: softmmu_reverse_endian.diff --]
[-- Type: text/x-diff; name="softmmu_reverse_endian.diff", Size: 167738 bytes --]

Index: cpu-all.h
===================================================================
--- cpu-all.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ cpu-all.h	2007-10-15 15:41:09.000000000 +0000
@@ -161,9 +161,9 @@
  *
  * endian is:
  * (empty): target cpu endianness or 8 bit access
- *   r    : reversed target cpu endianness (not implemented yet)
- *   be   : big endian (not implemented yet)
- *   le   : little endian (not implemented yet)
+ *   r    : reversed target cpu endianness
+ *   be   : big endian
+ *   le   : little endian
  *
  * access_type is:
  *   raw    : host memory access
@@ -215,7 +215,32 @@
 #endif
 }
 
-static inline int ldl_le_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return (uint32_t)val;
+#else
+    uint8_t *p = ptr;
+    return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+#endif
+}
+
+static inline int64_t ldsl_le_p(void *ptr)
+{
+#ifdef __powerpc__
+    int val;
+    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+    return (int32_t)val;
+#else
+    uint8_t *p = ptr;
+    return (int32_t)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24));
+#endif
+}
+#else
+static inline int ldul_le_p(void *ptr)
 {
 #ifdef __powerpc__
     int val;
@@ -226,13 +251,14 @@
     return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
 #endif
 }
+#endif
 
 static inline uint64_t ldq_le_p(void *ptr)
 {
     uint8_t *p = ptr;
     uint32_t v1, v2;
-    v1 = ldl_le_p(p);
-    v2 = ldl_le_p(p + 4);
+    v1 = ldul_le_p(p);
+    v2 = ldul_le_p(p + 4);
     return v1 | ((uint64_t)v2 << 32);
 }
 
@@ -275,7 +301,7 @@
         float32 f;
         uint32_t i;
     } u;
-    u.i = ldl_le_p(ptr);
+    u.i = ldul_le_p(ptr);
     return u.f;
 }
 
@@ -292,8 +318,8 @@
 static inline float64 ldfq_le_p(void *ptr)
 {
     CPU_DoubleU u;
-    u.l.lower = ldl_le_p(ptr);
-    u.l.upper = ldl_le_p(ptr + 4);
+    u.l.lower = ldul_le_p(ptr);
+    u.l.upper = ldul_le_p(ptr + 4);
     return u.d;
 }
 
@@ -317,10 +343,22 @@
     return *(int16_t *)ptr;
 }
 
-static inline int ldl_le_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_le_p(void *ptr)
+{
+    return *(uint32_t *)ptr;
+}
+
+static inline int64_t ldsl_le_p(void *ptr)
+{
+    return *(int32_t *)ptr;
+}
+#else
+static inline int ldul_le_p(void *ptr)
 {
     return *(uint32_t *)ptr;
 }
+#endif
 
 static inline uint64_t ldq_le_p(void *ptr)
 {
@@ -397,7 +435,38 @@
 #endif
 }
 
-static inline int ldl_be_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_be_p(void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+    int val;
+    asm volatile ("movl %1, %0\n"
+                  "bswap %0\n"
+                  : "=r" (val)
+                  : "m" (*(uint32_t *)ptr));
+    return (uint32_t)val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
+#endif
+}
+
+static inline int64_t ldsl_be_p(void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+    int val;
+    asm volatile ("movl %1, %0\n"
+                  "bswap %0\n"
+                  : "=r" (val)
+                  : "m" (*(uint32_t *)ptr));
+    return (int32_t)val;
+#else
+    uint8_t *b = (uint8_t *) ptr;
+    return (int32_t)((b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]);
+#endif
+}
+#else
+static inline int ldul_be_p(void *ptr)
 {
 #if defined(__i386__) || defined(__x86_64__)
     int val;
@@ -411,12 +480,13 @@
     return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
 #endif
 }
+#endif
 
 static inline uint64_t ldq_be_p(void *ptr)
 {
     uint32_t a,b;
-    a = ldl_be_p(ptr);
-    b = ldl_be_p(ptr+4);
+    a = ldul_be_p(ptr);
+    b = ldul_be_p(ptr+4);
     return (((uint64_t)a<<32)|b);
 }
 
@@ -464,7 +534,7 @@
         float32 f;
         uint32_t i;
     } u;
-    u.i = ldl_be_p(ptr);
+    u.i = ldul_be_p(ptr);
     return u.f;
 }
 
@@ -481,8 +551,8 @@
 static inline float64 ldfq_be_p(void *ptr)
 {
     CPU_DoubleU u;
-    u.l.upper = ldl_be_p(ptr);
-    u.l.lower = ldl_be_p(ptr + 4);
+    u.l.upper = ldul_be_p(ptr);
+    u.l.lower = ldul_be_p(ptr + 4);
     return u.d;
 }
 
@@ -506,11 +576,23 @@
     return *(int16_t *)ptr;
 }
 
-static inline int ldl_be_p(void *ptr)
+#if (TARGET_LONG_BITS == 64)
+static inline int64_t ldul_be_p(void *ptr)
 {
     return *(uint32_t *)ptr;
 }
 
+static inline int64_t ldsl_be_p(void *ptr)
+{
+    return *(int32_t *)ptr;
+}
+#else
+static inline int ldul_be_p(void *ptr)
+{
+    return *(uint32_t *)ptr;
+}
+#endif
+
 static inline uint64_t ldq_be_p(void *ptr)
 {
     return *(uint64_t *)ptr;
@@ -557,9 +639,13 @@
 
 /* target CPU memory access functions */
 #if defined(TARGET_WORDS_BIGENDIAN)
+/* native-endian */
 #define lduw_p(p) lduw_be_p(p)
 #define ldsw_p(p) ldsw_be_p(p)
-#define ldl_p(p) ldl_be_p(p)
+#define ldul_p(p) ldul_be_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_p(p) ldsl_be_p(p)
+#endif
 #define ldq_p(p) ldq_be_p(p)
 #define ldfl_p(p) ldfl_be_p(p)
 #define ldfq_p(p) ldfq_be_p(p)
@@ -568,10 +654,29 @@
 #define stq_p(p, v) stq_be_p(p, v)
 #define stfl_p(p, v) stfl_be_p(p, v)
 #define stfq_p(p, v) stfq_be_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_le_p(p)
+#define ldswr_p(p) ldsw_le_p(p)
+#define ldulr_p(p) ldul_le_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_p(p) ldsl_le_p(p)
+#endif
+#define ldqr_p(p) ldq_le_p(p)
+#define ldflr_p(p) ldfl_le_p(p)
+#define ldfqr_p(p) ldfq_le_p(p)
+#define stwr_p(p, v) stw_le_p(p, v)
+#define stlr_p(p, v) stl_le_p(p, v)
+#define stqr_p(p, v) stq_le_p(p, v)
+#define stflr_p(p, v) stfl_le_p(p, v)
+#define stfqr_p(p, v) stfq_le_p(p, v)
 #else
+/* native-endian */
 #define lduw_p(p) lduw_le_p(p)
 #define ldsw_p(p) ldsw_le_p(p)
-#define ldl_p(p) ldl_le_p(p)
+#define ldul_p(p) ldul_le_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_p(p) ldsl_le_p(p)
+#endif
 #define ldq_p(p) ldq_le_p(p)
 #define ldfl_p(p) ldfl_le_p(p)
 #define ldfq_p(p) ldfq_le_p(p)
@@ -580,6 +685,21 @@
 #define stq_p(p, v) stq_le_p(p, v)
 #define stfl_p(p, v) stfl_le_p(p, v)
 #define stfq_p(p, v) stfq_le_p(p, v)
+/* reverse-endian */
+#define lduwr_p(p) lduw_be_p(p)
+#define ldswr_p(p) ldsw_be_p(p)
+#define ldulr_p(p) ldul_be_p(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_p(p) ldsl_be_p(p)
+#endif
+#define ldqr_p(p) ldq_be_p(p)
+#define ldflr_p(p) ldfl_be_p(p)
+#define ldfqr_p(p) ldfq_be_p(p)
+#define stwr_p(p, v) stw_be_p(p, v)
+#define stlr_p(p, v) stl_be_p(p, v)
+#define stqr_p(p, v) stq_be_p(p, v)
+#define stflr_p(p, v) stfl_be_p(p, v)
+#define stfqr_p(p, v) stfq_be_p(p, v)
 #endif
 
 /* MMU memory access macros */
@@ -605,11 +725,15 @@
 #define laddr(x) (uint8_t *)(long)(x)
 #endif
 
+/* native-endian */
 #define ldub_raw(p) ldub_p(laddr((p)))
 #define ldsb_raw(p) ldsb_p(laddr((p)))
 #define lduw_raw(p) lduw_p(laddr((p)))
 #define ldsw_raw(p) ldsw_p(laddr((p)))
-#define ldl_raw(p) ldl_p(laddr((p)))
+#define ldul_raw(p) ldul_p(laddr((p)))
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_raw(p) ldsl_p(laddr((p)))
+#endif
 #define ldq_raw(p) ldq_p(laddr((p)))
 #define ldfl_raw(p) ldfl_p(laddr((p)))
 #define ldfq_raw(p) ldfq_p(laddr((p)))
@@ -619,16 +743,112 @@
 #define stq_raw(p, v) stq_p(saddr((p)), v)
 #define stfl_raw(p, v) stfl_p(saddr((p)), v)
 #define stfq_raw(p, v) stfq_p(saddr((p)), v)
-
+/* reverse endian */
+#define ldubr_raw(p) ldub_p(laddr((p)))
+#define ldsbr_raw(p) ldsb_p(laddr((p)))
+#define lduwr_raw(p) lduwr_p(laddr((p)))
+#define ldswr_raw(p) ldswr_p(laddr((p)))
+#define ldulr_raw(p) ldulr_p(laddr((p)))
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_raw(p) ldslr_p(laddr((p)))
+#endif
+#define ldqr_raw(p) ldqr_p(laddr((p)))
+#define ldflr_raw(p) ldflr_p(laddr((p)))
+#define ldfqr_raw(p) ldfqr_p(laddr((p)))
+#define stbr_raw(p, v) stb_p(saddr((p)), v)
+#define stwr_raw(p, v) stwr_p(saddr((p)), v)
+#define stlr_raw(p, v) stlr_p(saddr((p)), v)
+#define stqr_raw(p, v) stqr_p(saddr((p)), v)
+#define stflr_raw(p, v) stflr_p(saddr((p)), v)
+#define stfqr_raw(p, v) stfqr_p(saddr((p)), v)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be_raw(p) ldub_raw(p)
+#define ldsb_be_raw(p) ldsb_raw(p)
+#define lduw_be_raw(p) lduw_raw(p)
+#define ldsw_be_raw(p) ldsw_raw(p)
+#define ldul_be_raw(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_raw(p) ldsl_raw(p)
+#endif
+#define ldq_be_raw(p) ldq_raw(p)
+#define ldfl_be_raw(p) ldfl_raw(p)
+#define ldfq_be_raw(p) ldfq_raw(p)
+#define stb_be_raw(p, v) stb_raw(p, v)
+#define stw_be_raw(p, v) stw_raw(p, v)
+#define stl_be_raw(p, v) stl_raw(p, v)
+#define stq_be_raw(p, v) stq_raw(p, v)
+#define stfl_be_raw(p, v) stfl_raw(p, v)
+#define stfq_be_raw(p, v) stfq_raw(p, v)
+/* little-endian */
+#define ldub_le_raw(p) ldubr_raw(p)
+#define ldsb_le_raw(p) ldsbr_raw(p)
+#define lduw_le_raw(p) lduwr_raw(p)
+#define ldsw_le_raw(p) ldswr_raw(p)
+#define ldul_le_raw(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_raw(p) ldslr_raw(p)
+#endif
+#define ldq_le_raw(p) ldqr_raw(p)
+#define ldfl_le_raw(p) ldflr_raw(p)
+#define ldfq_le_raw(p) ldfqr_raw(p)
+#define stb_le_raw(p, v) stbr_raw(p, v)
+#define stw_le_raw(p, v) stwr_raw(p, v)
+#define stl_le_raw(p, v) stlr_raw(p, v)
+#define stq_le_raw(p, v) stqr_raw(p, v)
+#define stfl_le_raw(p, v) stflr_raw(p, v)
+#define stfq_le_raw(p, v) stfqr_raw(p, v)
+#else
+/* big-endian */
+#define ldub_be_raw(p) ldubr_raw(p)
+#define ldsb_be_raw(p) ldsbr_raw(p)
+#define lduw_be_raw(p) lduwr_raw(p)
+#define ldsw_be_raw(p) ldswr_raw(p)
+#define ldul_be_raw(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_raw(p) ldslr_raw(p)
+#endif
+#define ldq_be_raw(p) ldqr_raw(p)
+#define ldfl_be_raw(p) ldflr_raw(p)
+#define ldfq_be_raw(p) ldfqr_raw(p)
+#define stb_be_raw(p, v) stbr_raw(p, v)
+#define stw_be_raw(p, v) stwr_raw(p, v)
+#define stl_be_raw(p, v) stlr_raw(p, v)
+#define stq_be_raw(p, v) stqr_raw(p, v)
+#define stfl_be_raw(p, v) stflr_raw(p, v)
+#define stfq_be_raw(p, v) stfqr_raw(p, v)
+/* little-endian */
+#define ldub_le_raw(p) ldub_raw(p)
+#define ldsb_le_raw(p) ldsb_raw(p)
+#define lduw_le_raw(p) lduw_raw(p)
+#define ldsw_le_raw(p) ldsw_raw(p)
+#define ldul_le_raw(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_raw(p) ldsl_raw(p)
+#endif
+#define ldq_le_raw(p) ldq_raw(p)
+#define ldfl_le_raw(p) ldfl_raw(p)
+#define ldfq_le_raw(p) ldfq_raw(p)
+#define stb_le_raw(p, v) stb_raw(p, v)
+#define stw_le_raw(p, v) stw_raw(p, v)
+#define stl_le_raw(p, v) stl_raw(p, v)
+#define stq_le_raw(p, v) stq_raw(p, v)
+#define stfl_le_raw(p, v) stfl_raw(p, v)
+#define stfq_le_raw(p, v) stfq_raw(p, v)
+#endif
 
 #if defined(CONFIG_USER_ONLY)
 
 /* if user mode, no other memory access functions */
+/* native-endian */
 #define ldub(p) ldub_raw(p)
 #define ldsb(p) ldsb_raw(p)
 #define lduw(p) lduw_raw(p)
 #define ldsw(p) ldsw_raw(p)
-#define ldl(p) ldl_raw(p)
+#define ldul(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl(p) ldsl_raw(p)
+#endif
 #define ldq(p) ldq_raw(p)
 #define ldfl(p) ldfl_raw(p)
 #define ldfq(p) ldfq_raw(p)
@@ -638,19 +858,173 @@
 #define stq(p, v) stq_raw(p, v)
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr(p) ldub_raw(p)
+#define ldsbr(p) ldsb_raw(p)
+#define lduwr(p) lduwr_raw(p)
+#define ldswr(p) ldswr_raw(p)
+#define ldulr(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr(p) ldslr_raw(p)
+#endif
+#define ldqr(p) ldqr_raw(p)
+#define ldflr(p) ldflr_raw(p)
+#define ldfqr(p) ldfqr_raw(p)
+#define stbr(p, v) stb_raw(p, v)
+#define stwr(p, v) stwr_raw(p, v)
+#define stlr(p, v) stlr_raw(p, v)
+#define stqr(p, v) stqr_raw(p, v)
+#define stflr(p, v) stflr_raw(p, v)
+#define stfqr(p, v) stfqr_raw(p, v)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be(p) ldub(p)
+#define ldsb_be(p) ldsb(p)
+#define lduw_be(p) lduw(p)
+#define ldsw_be(p) ldsw(p)
+#define ldul_be(p) ldul(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be(p) ldsl(p)
+#endif
+#define ldq_be(p) ldq(p)
+#define ldfl_be(p) ldfl(p)
+#define ldfq_be(p) ldfq(p)
+#define stb_be(p, v) stb(p, v)
+#define stw_be(p, v) stw(p, v)
+#define stl_be(p, v) stl(p, v)
+#define stq_be(p, v) stq(p, v)
+#define stfl_be(p, v) stfl(p, v)
+#define stfq_be(p, v) stfq(p, v)
+/* little-endian */
+#define ldub_le(p) ldubr(p)
+#define ldsb_le(p) ldsbr(p)
+#define lduw_le(p) lduwr(p)
+#define ldsw_le(p) ldswr(p)
+#define ldul_le(p) ldulr(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le(p) ldslr(p)
+#endif
+#define ldq_le(p) ldqr(p)
+#define ldfl_le(p) ldflr(p)
+#define ldfq_le(p) ldfqr(p)
+#define stb_le(p, v) stbr(p, v)
+#define stw_le(p, v) stwr(p, v)
+#define stl_le(p, v) stlr(p, v)
+#define stq_le(p, v) stqr(p, v)
+#define stfl_le(p, v) stflr(p, v)
+#define stfq_le(p, v) stfqr(p, v)
+#else
+/* big-endian */
+#define ldub_be(p) ldubr(p)
+#define ldsb_be(p) ldsbr(p)
+#define lduw_be(p) lduwr(p)
+#define ldsw_be(p) ldswr(p)
+#define ldul_be(p) ldulr(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be(p) ldslr(p)
+#endif
+#define ldq_be(p) ldqr(p)
+#define ldfl_be(p) ldflr(p)
+#define ldfq_be(p) ldfqr(p)
+#define stb_be(p, v) stbr(p, v)
+#define stw_be(p, v) stwr(p, v)
+#define stl_be(p, v) stlr(p, v)
+#define stq_be(p, v) stqr(p, v)
+#define stfl_be(p, v) stflr(p, v)
+#define stfq_be(p, v) stfqr(p, v)
+/* little-endian */
+#define ldub_le(p) ldub(p)
+#define ldsb_le(p) ldsb(p)
+#define lduw_le(p) lduw(p)
+#define ldsw_le(p) ldsw(p)
+#define ldul_le(p) ldul(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le(p) ldsl(p)
+#endif
+#define ldq_le(p) ldq(p)
+#define ldfl_le(p) ldfl(p)
+#define ldfq_le(p) ldfq(p)
+#define stb_le(p, v) stb(p, v)
+#define stw_le(p, v) stw(p, v)
+#define stl_le(p, v) stl(p, v)
+#define stq_le(p, v) stq(p, v)
+#define stfl_le(p, v) stfl(p, v)
+#define stfq_le(p, v) stfq(p, v)
+#endif
 
+/* native-endian */
 #define ldub_code(p) ldub_raw(p)
 #define ldsb_code(p) ldsb_raw(p)
 #define lduw_code(p) lduw_raw(p)
 #define ldsw_code(p) ldsw_raw(p)
-#define ldl_code(p) ldl_raw(p)
+#define ldul_code(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_code(p) ldsl_raw(p)
+#endif
 #define ldq_code(p) ldq_raw(p)
+/* reverse-endian */
+#define ldubr_code(p) ldub_raw(p)
+#define ldsbr_code(p) ldsb_raw(p)
+#define lduwr_code(p) lduwr_raw(p)
+#define ldswr_code(p) ldswr_raw(p)
+#define ldulr_code(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_code(p) ldslr_raw(p)
+#endif
+#define ldqr_code(p) ldqr_raw(p)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be_code(p) ldub_code(p)
+#define ldsb_be_code(p) ldsb_code(p)
+#define lduw_be_code(p) lduw_code(p)
+#define ldsw_be_code(p) ldsw_code(p)
+#define ldul_be_code(p) ldul_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_code(p) ldsl_code(p)
+#endif
+#define ldq_be_code(p) ldq_code(p)
+/* little-endian */
+#define ldub_le_code(p) ldubr_code(p)
+#define ldsb_le_code(p) ldsbr_code(p)
+#define lduw_le_code(p) lduwr_code(p)
+#define ldsw_le_code(p) ldswr_code(p)
+#define ldul_le_code(p) ldulr_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_code(p) ldslr_code(p)
+#endif
+#define ldq_le_code(p) ldqr_code(p)
+#else
+/* big-endian */
+#define ldub_be_code(p) ldubr_code(p)
+#define ldsb_be_code(p) ldsbr_code(p)
+#define lduw_be_code(p) lduwr_code(p)
+#define ldsw_be_code(p) ldswr_code(p)
+#define ldul_be_code(p) ldulr_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_code(p) ldslr_code(p)
+#endif
+#define ldq_be_code(p) ldqr_code(p)
+/* little-endian */
+#define ldub_le_code(p) ldub_code(p)
+#define ldsb_le_code(p) ldsb_code(p)
+#define lduw_le_code(p) lduw_code(p)
+#define ldsw_le_code(p) ldsw_code(p)
+#define ldul_le_code(p) ldul_code(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_code(p) ldsl_code(p)
+#endif
+#define ldq_le_code(p) ldq_code(p)
+#endif
 
+/* native-endian */
 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
 #define lduw_kernel(p) lduw_raw(p)
 #define ldsw_kernel(p) ldsw_raw(p)
-#define ldl_kernel(p) ldl_raw(p)
+#define ldul_kernel(p) ldul_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_kernel(p) ldsl_raw(p)
+#endif
 #define ldq_kernel(p) ldq_raw(p)
 #define ldfl_kernel(p) ldfl_raw(p)
 #define ldfq_kernel(p) ldfq_raw(p)
@@ -660,6 +1034,99 @@
 #define stq_kernel(p, v) stq_raw(p, v)
 #define stfl_kernel(p, v) stfl_raw(p, v)
 #define stfq_kernel(p, vt) stfq_raw(p, v)
+/* reverse-endian */
+#define ldubr_kernel(p) ldub_raw(p)
+#define ldsbr_kernel(p) ldsb_raw(p)
+#define lduwr_kernel(p) lduwr_raw(p)
+#define ldswr_kernel(p) ldswr_raw(p)
+#define ldulr_kernel(p) ldulr_raw(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldslr_kernel(p) ldslr_raw(p)
+#endif
+#define ldqr_kernel(p) ldqr_raw(p)
+#define ldflr_kernel(p) ldflr_raw(p)
+#define ldfqr_kernel(p) ldfqr_raw(p)
+#define stbr_kernel(p, v) stbr_raw(p, v)
+#define stwr_kernel(p, v) stwr_raw(p, v)
+#define stlr_kernel(p, v) stlr_raw(p, v)
+#define stqr_kernel(p, v) stqr_raw(p, v)
+#define stflr_kernel(p, v) stflr_raw(p, v)
+#define stfqr_kernel(p, vt) stfqr_raw(p, v)
+#if defined(TARGET_WORDS_BIGENDIAN)
+/* big-endian */
+#define ldub_be_kernel(p) ldub_kernel(p)
+#define ldsb_be_kernel(p) ldsb_kernel(p)
+#define lduw_be_kernel(p) lduw_kernel(p)
+#define ldsw_be_kernel(p) ldsw_kernel(p)
+#define ldul_be_kernel(p) ldul_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_kernel(p) ldsl_kernel(p)
+#endif
+#define ldq_be_kernel(p) ldq_kernel(p)
+#define ldfl_be_kernel(p) ldfl_kernel(p)
+#define ldfq_be_kernel(p) ldfq_kernel(p)
+#define stb_be_kernel(p, v) stb_kernel(p, v)
+#define stw_be_kernel(p, v) stw_kernel(p, v)
+#define stl_be_kernel(p, v) stl_kernel(p, v)
+#define stq_be_kernel(p, v) stq_kernel(p, v)
+#define stfl_be_kernel(p, v) stfl_kernel(p, v)
+#define stfq_be_kernel(p, vt) stfq_kernel(p, vt)
+/* little-endian */
+#define ldub_le_kernel(p) ldubr_kernel(p)
+#define ldsb_le_kernel(p) ldsbr_kernel(p)
+#define lduw_le_kernel(p) lduwr_kernel(p)
+#define ldsw_le_kernel(p) ldswr_kernel(p)
+#define ldul_le_kernel(p) ldulr_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_kernel(p) ldslr_kernel(p)
+#endif
+#define ldq_le_kernel(p) ldqr_kernel(p)
+#define ldfl_le_kernel(p) ldflr_kernel(p)
+#define ldfq_le_kernel(p) ldfqr_kernel(p)
+#define stb_le_kernel(p, v) stbr_kernel(p, v)
+#define stw_le_kernel(p, v) stwr_kernel(p, v)
+#define stl_le_kernel(p, v) stlr_kernel(p, v)
+#define stq_le_kernel(p, v) stqr_kernel(p, v)
+#define stfl_le_kernel(p, v) stflr_kernel(p, v)
+#define stfq_le_kernel(p, vt) stfqr_kernel(p, vt)
+#else
+/* big-endian */
+#define ldub_be_kernel(p) ldubr_kernel(p)
+#define ldsb_be_kernel(p) ldsbr_kernel(p)
+#define lduw_be_kernel(p) lduwr_kernel(p)
+#define ldsw_be_kernel(p) ldswr_kernel(p)
+#define ldul_be_kernel(p) ldulr_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_be_kernel(p) ldslr_kernel(p)
+#endif
+#define ldq_be_kernel(p) ldqr_kernel(p)
+#define ldfl_be_kernel(p) ldflr_kernel(p)
+#define ldfq_be_kernel(p) ldfqr_kernel(p)
+#define stb_be_kernel(p, v) stbr_kernel(p, v)
+#define stw_be_kernel(p, v) stwr_kernel(p, v)
+#define stl_be_kernel(p, v) stlr_kernel(p, v)
+#define stq_be_kernel(p, v) stqr_kernel(p, v)
+#define stfl_be_kernel(p, v) stflr_kernel(p, v)
+#define stfq_be_kernel(p, vt) stfqr_kernel(p, vt)
+/* little-endian */
+#define ldub_le_kernel(p) ldub_kernel(p)
+#define ldsb_le_kernel(p) ldsb_kernel(p)
+#define lduw_le_kernel(p) lduw_kernel(p)
+#define ldsw_le_kernel(p) ldsw_kernel(p)
+#define ldul_le_kernel(p) ldul_kernel(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl_le_kernel(p) ldsl_kernel(p)
+#endif
+#define ldq_le_kernel(p) ldq_kernel(p)
+#define ldfl_le_kernel(p) ldfl_kernel(p)
+#define ldfq_le_kernel(p) ldfq_kernel(p)
+#define stb_le_kernel(p, v) stb_kernel(p, v)
+#define stw_le_kernel(p, v) stw_kernel(p, v)
+#define stl_le_kernel(p, v) stl_kernel(p, v)
+#define stq_le_kernel(p, v) stq_kernel(p, v)
+#define stfl_le_kernel(p, v) stfl_kernel(p, v)
+#define stfq_le_kernel(p, vt) stfq_kernel(p, vt)
+#endif
 
 #endif /* defined(CONFIG_USER_ONLY) */
 
@@ -790,6 +1257,8 @@
    the physical address */
 #define IO_MEM_ROMD        (1)
 #define IO_MEM_SUBPAGE     (2)
+/* On some target CPUs, endiannes is stored in page tables */
+#define IO_MEM_REVERSE     (3)
 
 typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
 typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
@@ -821,7 +1290,7 @@
 }
 uint32_t ldub_phys(target_phys_addr_t addr);
 uint32_t lduw_phys(target_phys_addr_t addr);
-uint32_t ldl_phys(target_phys_addr_t addr);
+uint32_t ldul_phys(target_phys_addr_t addr);
 uint64_t ldq_phys(target_phys_addr_t addr);
 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val);
 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val);
Index: cpu-exec.c
===================================================================
--- cpu-exec.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ cpu-exec.c	2007-10-15 15:41:09.000000000 +0000
@@ -436,12 +436,12 @@
                          /* FIXME: this should respect TPR */
                          env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
                          svm_check_intercept(SVM_EXIT_VINTR);
-                         intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
+                         intno = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
                          if (loglevel & CPU_LOG_TB_IN_ASM)
                              fprintf(logfile, "Servicing virtual hardware INT=0x%02x\n", intno);
 	                 do_interrupt(intno, 0, 0, -1, 1);
                          stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl),
-                                  ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
+                                  ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)) & ~V_IRQ_MASK);
 #if defined(__sparc__) && !defined(HOST_SOLARIS)
                          tmp_T0 = 0;
 #else
Index: exec-all.h
===================================================================
--- exec-all.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ exec-all.h	2007-10-15 15:41:09.000000000 +0000
@@ -569,6 +569,7 @@
 #define MEMSUFFIX _code
 #define env cpu_single_env
 
+/* native-endian */
 #define DATA_SIZE 1
 #include "softmmu_header.h"
 
@@ -581,6 +582,21 @@
 #define DATA_SIZE 8
 #include "softmmu_header.h"
 
+/* reverse-endian */
+#define REVERSE_ENDIAN
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef REVERSE_ENDIAN
+
 #undef ACCESS_TYPE
 #undef MEMSUFFIX
 #undef env
Index: exec.c
===================================================================
--- exec.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ exec.c	2007-10-15 15:41:09.000000000 +0000
@@ -2202,7 +2202,7 @@
 
 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
 {
-    return ldl_phys(addr);
+    return ldul_phys(addr);
 }
 
 /* Generate a debug exception if a watchpoint has been hit.
@@ -2507,7 +2507,7 @@
     uint8_t *ptr;
     uint32_t val;
     target_phys_addr_t page;
-    unsigned long pd;
+    unsigned long pd, addr1;
     PhysPageDesc *p;
 
     while (len > 0) {
@@ -2524,31 +2524,54 @@
 
         if (is_write) {
             if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                /* XXX: could force cpu_single_env to NULL to avoid
-                   potential bugs */
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit write access */
-                    val = ldl_p(buf);
-                    io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit write access */
-                    val = lduw_p(buf);
-                    io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        stlr_p(ptr, *(uint32_t *)buf);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        stwr_p(ptr, *(uint16_t *)buf);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *ptr = *buf;
+                    goto invalidate_code;
                 } else {
-                    /* 8 bit write access */
-                    val = ldub_p(buf);
-                    io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
-                    l = 1;
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    /* XXX: could force cpu_single_env to NULL to avoid
+                       potential bugs */
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit write access */
+                        val = ldul_p(buf);
+                        io_mem_write[io_index][2](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit write access */
+                        val = lduw_p(buf);
+                        io_mem_write[io_index][1](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit write access */
+                        val = ldub_p(buf);
+                        io_mem_write[io_index][0](io_mem_opaque[io_index],
+                                                  addr, val);
+                        l = 1;
+                    }
                 }
             } else {
-                unsigned long addr1;
                 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
                 /* RAM case */
                 ptr = phys_ram_base + addr1;
                 memcpy(ptr, buf, l);
+            invalidate_code:
                 if (!cpu_physical_memory_is_dirty(addr1)) {
                     /* invalidate code */
                     tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
@@ -2560,23 +2583,45 @@
         } else {
             if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
                 !(pd & IO_MEM_ROMD)) {
-                /* I/O case */
-                io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-                if (l >= 4 && ((addr & 3) == 0)) {
-                    /* 32 bit read access */
-                    val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
-                    stl_p(buf, val);
-                    l = 4;
-                } else if (l >= 2 && ((addr & 1) == 0)) {
-                    /* 16 bit read access */
-                    val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
-                    stw_p(buf, val);
-                    l = 2;
+                if (pd & IO_MEM_REVERSE) {
+                    /* Specific case for reverse endian page write */
+                    addr1 = (pd & TARGET_PAGE_MASK) +
+                        (addr & ~TARGET_PAGE_MASK);
+                    ptr = phys_ram_base + addr1;
+                    for (; l >= 4; l -= 4) {
+                        *(uint32_t *)buf = ldulr_p(ptr);
+                        ptr += 4;
+                        buf += 4;
+                    }
+                    for (; l >= 2; l -= 2) {
+                        *(uint16_t *)buf = lduwr_p(ptr);
+                        ptr += 2;
+                        buf += 2;
+                    }
+                    if (l >= 1)
+                        *buf = *ptr;
                 } else {
-                    /* 8 bit read access */
-                    val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr);
-                    stb_p(buf, val);
-                    l = 1;
+                    /* I/O case */
+                    io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+                    if (l >= 4 && ((addr & 3) == 0)) {
+                        /* 32 bit read access */
+                        val = io_mem_read[io_index][2](io_mem_opaque[io_index],
+                                                       addr);
+                        stl_p(buf, val);
+                        l = 4;
+                    } else if (l >= 2 && ((addr & 1) == 0)) {
+                        /* 16 bit read access */
+                        val = io_mem_read[io_index][1](io_mem_opaque[io_index],
+                                                       addr);
+                        stw_p(buf, val);
+                        l = 2;
+                    } else {
+                        /* 8 bit read access */
+                        val = io_mem_read[io_index][0](io_mem_opaque[io_index],
+                                                       addr);
+                        stb_p(buf, val);
+                        l = 1;
+                    }
                 }
             } else {
                 /* RAM case */
@@ -2632,7 +2677,7 @@
 
 
 /* warning: addr must be aligned */
-uint32_t ldl_phys(target_phys_addr_t addr)
+uint32_t ldul_phys(target_phys_addr_t addr)
 {
     int io_index;
     uint8_t *ptr;
@@ -2656,7 +2701,7 @@
         /* RAM case */
         ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldl_p(ptr);
+        val = ldul_p(ptr);
     }
     return val;
 }
@@ -2907,6 +2952,21 @@
 #define env cpu_single_env
 #define SOFTMMU_CODE_ACCESS
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2918,6 +2978,7 @@
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 #undef env
 
Index: monitor.c
===================================================================
--- monitor.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ monitor.c	2007-10-15 15:41:09.000000000 +0000
@@ -595,7 +595,7 @@
                 v = lduw_raw(buf + i);
                 break;
             case 4:
-                v = (uint32_t)ldl_raw(buf + i);
+                v = (uint32_t)ldul_raw(buf + i);
                 break;
             case 8:
                 v = ldq_raw(buf + i);
Index: softmmu_exec.h
===================================================================
--- softmmu_exec.h.orig	2007-10-14 13:00:06.000000000 +0000
+++ softmmu_exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -1,13 +1,7 @@
 /* Common softmmu definitions and inline routines.  */
 
-/* XXX: find something cleaner.
- * Furthermore, this is false for 64 bits targets
- */
-#define ldul_user       ldl_user
-#define ldul_kernel     ldl_kernel
-#define ldul_hypv       ldl_hypv
-#define ldul_executive  ldl_executive
-#define ldul_supervisor ldl_supervisor
+#define lduq_user ldq_user
+#define lduq_kernel ldq_kernel
 
 #define ACCESS_TYPE 0
 #define MEMSUFFIX MMU_MODE0_SUFFIX
@@ -104,7 +98,10 @@
 #define ldsb(p) ldsb_data(p)
 #define lduw(p) lduw_data(p)
 #define ldsw(p) ldsw_data(p)
-#define ldl(p) ldl_data(p)
+#define ldul(p) ldul_data(p)
+#if (TARGET_LONG_BITS == 64)
+#define ldsl(p) ldsl_data(p)
+#endif
 #define ldq(p) ldq_data(p)
 
 #define stb(p, v) stb_data(p, v)
Index: softmmu_header.h
===================================================================
--- softmmu_header.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ softmmu_header.h	2007-10-15 15:41:10.000000000 +0000
@@ -17,27 +17,86 @@
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
+
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
+#if defined(TARGET_WORDS_BIGENDIAN)
+#define ESUFFIX _be
+#else
+#define ESUFFIX _le
+#endif
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
+#define LSUFFIX q
+#define LUSUFFIX q
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
-#define USUFFIX l
+#define USUFFIX ul
+#define LSUFFIX l
+#define LUSUFFIX ul
+#if (TARGET_LONG_BITS == 64)
+#define DATA_STYPE int32_t
+#endif
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define USUFFIX uw
+#define LSUFFIX w
+#define LUSUFFIX uw
 #define DATA_TYPE uint16_t
 #define DATA_STYPE int16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define USUFFIX ub
+#define LSUFFIX b
+#define LUSUFFIX ub
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#else
+#error unsupported data size
+#endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if defined(TARGET_WORDS_BIGENDIAN)
+#define ESUFFIX _le
+#else
+#define ESUFFIX _be
+#endif
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define LSUFFIX q
+#define LUSUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX ulr
+#define LSUFFIX l
+#define LUSUFFIX ul
+#if (TARGET_LONG_BITS == 64)
+#define DATA_STYPE int32_t
+#endif
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define LSUFFIX w
+#define LUSUFFIX uw
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define LSUFFIX b
+#define LUSUFFIX ub
 #define DATA_TYPE uint8_t
 #define DATA_STYPE int8_t
 #else
 #error unsupported data size
 #endif
+#endif /* defined(REVERSE_ENDIAN) */
 
 #if ACCESS_TYPE < (NB_MMU_MODES)
 
@@ -121,7 +180,7 @@
     return res;
 }
 
-#if DATA_SIZE <= 2
+#if defined(DATA_STYPE)
 static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 {
     int res;
@@ -244,7 +303,7 @@
     return res;
 }
 
-#if DATA_SIZE <= 2
+#if defined(DATA_STYPE)
 static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
 {
     int res, index;
@@ -293,8 +352,29 @@
 
 #endif /* !asm */
 
+/* BE/LE access routines */
+static inline RES_TYPE glue(glue(glue(ld, LUSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(glue(ld, USUFFIX), MEMSUFFIX)(ptr);
+}
+
+#if defined(DATA_STYPE)
+static inline RES_TYPE glue(glue(glue(lds, LSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(glue(lds, SUFFIX), MEMSUFFIX)(ptr);
+}
+#endif
+
+#if ACCESS_TYPE != 3
+static inline void glue(glue(glue(st, LSUFFIX), ESUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+{
+    glue(glue(st, SUFFIX), MEMSUFFIX)(ptr, v);
+}
+#endif
+
 #if ACCESS_TYPE != (NB_MMU_MODES + 1)
 
+#if !defined(REVERSE_ENDIAN)
 #if DATA_SIZE == 8
 static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
 {
@@ -306,6 +386,11 @@
     return u.d;
 }
 
+static inline float64 glue(glue(ldfq, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfq, MEMSUFFIX)(ptr);
+}
+
 static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v)
 {
     union {
@@ -315,6 +400,12 @@
     u.d = v;
     glue(stq, MEMSUFFIX)(ptr, u.i);
 }
+
+static inline void glue(glue(stfq, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                        float64 v)
+{
+    glue(stfq, MEMSUFFIX)(ptr, v);
+}
 #endif /* DATA_SIZE == 8 */
 
 #if DATA_SIZE == 4
@@ -324,10 +415,15 @@
         float32 f;
         uint32_t i;
     } u;
-    u.i = glue(ldl, MEMSUFFIX)(ptr);
+    u.i = glue(ldul, MEMSUFFIX)(ptr);
     return u.f;
 }
 
+static inline float32 glue(glue(ldfl, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfl, MEMSUFFIX)(ptr);
+}
+
 static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
 {
     union {
@@ -337,8 +433,84 @@
     u.f = v;
     glue(stl, MEMSUFFIX)(ptr, u.i);
 }
+
+static inline void glue(glue(stfl, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                        float32 v)
+{
+    glue(stfl, MEMSUFFIX)(ptr, v);
+}
 #endif /* DATA_SIZE == 4 */
 
+#else /* defined(REVERSE_ENDIAN) */
+
+#if DATA_SIZE == 8
+static inline float64 glue(ldfqr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.i = glue(ldqr, MEMSUFFIX)(ptr);
+    return u.d;
+}
+
+static inline float64 glue(glue(ldfqr, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldfqr, MEMSUFFIX)(ptr);
+}
+
+static inline void glue(stfqr, MEMSUFFIX)(target_ulong ptr, float64 v)
+{
+    union {
+        float64 d;
+        uint64_t i;
+    } u;
+    u.d = v;
+    glue(stqr, MEMSUFFIX)(ptr, u.i);
+}
+
+static inline void glue(glue(stfqr, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                         float64 v)
+{
+    glue(stfqr, MEMSUFFIX)(ptr, v);
+}
+#endif /* DATA_SIZE == 8 */
+
+#if DATA_SIZE == 4
+static inline float32 glue(ldflr, MEMSUFFIX)(target_ulong ptr)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.i = glue(ldulr, MEMSUFFIX)(ptr);
+    return u.f;
+}
+
+static inline float32 glue(glue(ldflr, ESUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+    return glue(ldflr, MEMSUFFIX)(ptr);
+}
+
+static inline void glue(stflr, MEMSUFFIX)(target_ulong ptr, float32 v)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    glue(stlr, MEMSUFFIX)(ptr, u.i);
+}
+
+static inline void glue(glue(stflr, ESUFFIX), MEMSUFFIX)(target_ulong ptr,
+                                                         float32 v)
+{
+    glue(stflr, MEMSUFFIX)(ptr, v);
+}
+#endif /* DATA_SIZE == 4 */
+
+#endif /* defined(REVERSE_ENDIAN) */
+
 #endif /* ACCESS_TYPE != (NB_MMU_MODES + 1) */
 
 #undef RES_TYPE
@@ -346,7 +518,10 @@
 #undef DATA_STYPE
 #undef SUFFIX
 #undef USUFFIX
+#undef LSUFFIX
+#undef LUSUFFIX
 #undef DATA_SIZE
 #undef CPU_MMU_INDEX
 #undef MMUSUFFIX
+#undef ESUFFIX
 #undef ADDR_READ
Index: softmmu_template.h
===================================================================
--- softmmu_template.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ softmmu_template.h	2007-10-15 15:47:17.000000000 +0000
@@ -19,25 +19,66 @@
  */
 #define DATA_SIZE (1 << SHIFT)
 
+#if !defined(REVERSE_ENDIAN)
+/* native-endian */
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define USUFFIX q
+#define RSUFFIX qr
+#define URSUFFIX qr
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
-#define USUFFIX l
+#define USUFFIX ul
+#define RSUFFIX lr
+#define URSUFFIX ulr
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
 #define USUFFIX uw
+#define RSUFFIX wr
+#define URSUFFIX uwr
 #define DATA_TYPE uint16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
 #define USUFFIX ub
+#define RSUFFIX br
+#define URSUFFIX ubr
 #define DATA_TYPE uint8_t
 #else
 #error unsupported data size
 #endif
+#else /* !defined(REVERSE_ENDIAN) */
+/* reverse-endian */
+#if DATA_SIZE == 8
+#define SUFFIX qr
+#define USUFFIX qr
+#define RSUFFIX q
+#define URSUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX lr
+#define USUFFIX ulr
+#define RSUFFIX l
+#define URSUFFIX ul
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX wr
+#define USUFFIX uwr
+#define RSUFFIX w
+#define URSUFFIX uw
+#define DATA_TYPE uint16_t
+#elif DATA_SIZE == 1
+#define SUFFIX br
+#define USUFFIX ubr
+#define RSUFFIX b
+#define URSUFFIX ub
+#define DATA_TYPE uint8_t
+#else
+#error unsupported data size
+#endif
+#endif /* defined(REVERSE_ENDIAN) */
+
 
 #ifdef SOFTMMU_CODE_ACCESS
 #define READ_ACCESS_TYPE 2
@@ -47,25 +88,64 @@
 #define ADDR_READ addr_read
 #endif
 
+#if (defined(TARGET_WORDS_BIGENDIAN) && !defined(REVERSE_ENDIAN)) || \
+    (!defined(TARGET_WORDS_BIGENDIAN) && defined(REVERSE_ENDIAN))
+#define ACCESS_WORDS_BIGENDIAN
+#endif
+
+/* Beware: we do not have reverse-endian accessors for IOs */
+#if 0
+defined(REVERSE_ENDIAN)
+#define DO_IOSWAP 1
+#if SHIFT == 1
+#define IOSWAP(val) bswap16(val)
+#elif SHIFT >= 2
+#define IOSWAP(val) bswap32(val)
+#else
+#define IOSWAP(val) (val)
+#endif
+#else
+#define DO_IOSWAP 0
+#define IOSWAP(val) (val)
+#endif
+
 static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
                                                         int mmu_idx,
                                                         void *retaddr);
 static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
-                                              target_ulong tlb_addr)
+                                              target_ulong tlb_addr,
+                                              int do_ioswap)
 {
     DATA_TYPE res;
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
 #if SHIFT <= 2
     res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        res = IOSWAP(res);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    res = (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr) << 32;
-    res |= io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = (uint64_t)tmp << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= tmp;
 #else
-    res = io_mem_read[index][2](io_mem_opaque[index], physaddr);
-    res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res = tmp;
+    tmp = io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    res |= (uint64_t)tmp << 32;
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -92,10 +172,34 @@
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, READ_ACCESS_TYPE,
+                                            mmu_idx, retaddr);
+                    }
+#endif
+                    res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
@@ -144,10 +248,45 @@
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            res = glue(io_read, SUFFIX)(physaddr, tlb_addr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO read */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 1);
+                } else {
+                    /* Specific case for reverse endian page read */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages) */
+                        addr1 = addr & ~(DATA_SIZE - 1);
+                        addr2 = addr1 + DATA_SIZE;
+                        res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
+                                                                      mmu_idx,
+                                                                      retaddr);
+                        res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
+                                                                      mmu_idx,
+                                                                      retaddr);
+                        shift = (addr & (DATA_SIZE - 1)) * 8;
+#ifdef ACCESS_WORDS_BIGENDIAN
+                        res = (res1 >> shift) |
+                            (res2 << ((DATA_SIZE * 8) - shift));
+#else
+                        res = (res1 << shift) |
+                            (res2 >> ((DATA_SIZE * 8) - shift));
+#endif
+                        res = (DATA_TYPE)res;
+                    } else {
+                        /* unaligned/aligned access in the same page */
+                        res = glue(glue(ld, URSUFFIX), _raw)((uint8_t *)(long)physaddr);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                res = glue(io_read, SUFFIX)(physaddr, tlb_addr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
@@ -158,7 +297,7 @@
             res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
                                                           mmu_idx, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
             res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
 #else
             res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
@@ -186,22 +325,39 @@
 static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
                                           DATA_TYPE val,
                                           target_ulong tlb_addr,
-                                          void *retaddr)
+                                          void *retaddr, int do_ioswap)
 {
+#if SHIFT > 2
+    uint32_t tmp;
+#endif
     int index;
 
     index = (tlb_addr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     env->mem_write_vaddr = tlb_addr;
     env->mem_write_pc = (unsigned long)retaddr;
 #if SHIFT <= 2
+    if (do_ioswap != DO_IOSWAP)
+        val = IOSWAP(val);
     io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val);
 #else
-#ifdef TARGET_WORDS_BIGENDIAN
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val >> 32);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val);
+#ifdef ACCESS_WORDS_BIGENDIAN
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #else
-    io_mem_write[index][2](io_mem_opaque[index], physaddr, val);
-    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
+    tmp = val;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr, tmp);
+    tmp = val >> 32;
+    if (do_ioswap != DO_IOSWAP)
+        tmp = IOSWAP(tmp);
+    io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, tmp);
 #endif
 #endif /* SHIFT > 2 */
 #ifdef USE_KQEMU
@@ -224,12 +380,37 @@
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            retaddr = GETPC();
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    retaddr = GETPC();
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        goto do_unaligned_access;
+                    }
+#ifdef ALIGNED_ONLY
+                    if ((addr & (DATA_SIZE - 1)) != 0) {
+                        retaddr = GETPC();
+                        do_unaligned_access(addr, 1, mmu_idx, retaddr);
+                    }
+#endif
+                    glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr, val);
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                retaddr = GETPC();
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+            /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
@@ -275,15 +456,48 @@
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
         if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr);
+            if (tlb_addr & IO_MEM_REVERSE) {
+                if (tlb_addr & ~(TARGET_PAGE_MASK | IO_MEM_REVERSE)) {
+                    /* Specific case for reverse endian IO write */
+                    if ((addr & (DATA_SIZE - 1)) != 0)
+                        goto do_unaligned_access;
+                    glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr,
+                                           1);
+                } else {
+                    /* Specific case for reverse endian page write */
+                    if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >=
+                        TARGET_PAGE_SIZE) {
+                        /* slow unaligned access (it spans two pages or IO) */
+                        /* XXX: not efficient, but simple */
+                        for(i = 0;i < DATA_SIZE; i++) {
+#ifdef ACCESS_WORDS_BIGENDIAN
+                            glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
+                                                      mmu_idx, retaddr);
+#else
+                            glue(slow_stb, MMUSUFFIX)(addr + i,
+                                                      val >> (((DATA_SIZE - 1) * 8)
+                                                              - (i * 8)),
+                                                      mmu_idx, retaddr);
+#endif
+                        }
+
+                    } else {
+                        /* aligned/unaligned access in the same page */
+                        glue(glue(st, RSUFFIX), _raw)((uint8_t *)(long)physaddr,
+                                                      val);
+                    }
+                }
+            } else {
+                /* IO access */
+                if ((addr & (DATA_SIZE - 1)) != 0)
+                    goto do_unaligned_access;
+                glue(io_write, SUFFIX)(physaddr, val, tlb_addr, retaddr, 0);
+            }
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
             for(i = 0;i < DATA_SIZE; i++) {
-#ifdef TARGET_WORDS_BIGENDIAN
+#ifdef ACCESS_WORDS_BIGENDIAN
                 glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
                                           mmu_idx, retaddr);
 #else
@@ -304,10 +518,15 @@
 
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
 
+#undef DO_IOSWAP
+#undef IOSWAP
+#undef ACCESS_WORDS_BIGENDIAN
 #undef READ_ACCESS_TYPE
 #undef SHIFT
 #undef DATA_TYPE
 #undef SUFFIX
 #undef USUFFIX
+#undef RSUFFIX
+#undef URSUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
Index: hw/eepro100.c
===================================================================
--- hw/eepro100.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ hw/eepro100.c	2007-10-15 15:41:10.000000000 +0000
@@ -723,7 +723,7 @@
             uint32_t tbd_address = cb_address + 0x10;
             assert(tcb_bytes <= sizeof(buf));
             while (size < tcb_bytes) {
-                uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                uint32_t tx_buffer_address = ldul_phys(tbd_address);
                 uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                 //~ uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                 tbd_address += 8;
@@ -743,7 +743,7 @@
                     /* Extended TCB. */
                     assert(tcb_bytes == 0);
                     for (; tbd_count < 2; tbd_count++) {
-                        uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                        uint32_t tx_buffer_address = ldul_phys(tbd_address);
                         uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                         uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                         tbd_address += 8;
@@ -760,7 +760,7 @@
                 }
                 tbd_address = tbd_array;
                 for (; tbd_count < tx.tbd_count; tbd_count++) {
-                    uint32_t tx_buffer_address = ldl_phys(tbd_address);
+                    uint32_t tx_buffer_address = ldul_phys(tbd_address);
                     uint16_t tx_buffer_size = lduw_phys(tbd_address + 4);
                     uint16_t tx_buffer_el = lduw_phys(tbd_address + 6);
                     tbd_address += 8;
Index: hw/pc.c
===================================================================
--- hw/pc.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ hw/pc.c	2007-10-15 15:41:10.000000000 +0000
@@ -477,8 +477,8 @@
     }
 
     /* kernel protocol version */
-    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
-    if (ldl_p(header+0x202) == 0x53726448)
+    fprintf(stderr, "header magic: %#x\n", ldul_p(header+0x202));
+    if (ldul_p(header+0x202) == 0x53726448)
 	protocol = lduw_p(header+0x206);
     else
 	protocol = 0;
@@ -510,7 +510,7 @@
 
     /* highest address for loading the initrd */
     if (protocol >= 0x203)
-	initrd_max = ldl_p(header+0x22c);
+	initrd_max = ldul_p(header+0x22c);
     else
 	initrd_max = 0x37ffffff;
 
Index: hw/pl080.c
===================================================================
--- hw/pl080.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ hw/pl080.c	2007-10-15 15:41:10.000000000 +0000
@@ -162,10 +162,10 @@
             if (size == 0) {
                 /* Transfer complete.  */
                 if (ch->lli) {
-                    ch->src = ldl_phys(ch->lli);
-                    ch->dest = ldl_phys(ch->lli + 4);
-                    ch->ctrl = ldl_phys(ch->lli + 12);
-                    ch->lli = ldl_phys(ch->lli + 8);
+                    ch->src = ldul_phys(ch->lli);
+                    ch->dest = ldul_phys(ch->lli + 4);
+                    ch->ctrl = ldul_phys(ch->lli + 12);
+                    ch->lli = ldul_phys(ch->lli + 8);
                 } else {
                     ch->conf &= ~PL080_CCONF_E;
                 }
Index: hw/sun4m.c
===================================================================
--- hw/sun4m.c.orig	2007-10-14 16:27:39.000000000 +0000
+++ hw/sun4m.c	2007-10-15 15:41:10.000000000 +0000
@@ -465,7 +465,7 @@
         }
         if (initrd_size > 0) {
             for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
-                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
+                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
                     == 0x48647253) { // HdrS
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
Index: hw/sun4u.c
===================================================================
--- hw/sun4u.c.orig	2007-10-14 16:27:39.000000000 +0000
+++ hw/sun4u.c	2007-10-15 15:41:10.000000000 +0000
@@ -418,7 +418,7 @@
         }
         if (initrd_size > 0) {
             for (i = 0; i < 64 * TARGET_PAGE_SIZE; i += TARGET_PAGE_SIZE) {
-                if (ldl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
+                if (ldul_raw(phys_ram_base + KERNEL_LOAD_ADDR + i)
                     == 0x48647253) { // HdrS
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 16, INITRD_LOAD_ADDR);
                     stl_raw(phys_ram_base + KERNEL_LOAD_ADDR + i + 20, initrd_size);
Index: linux-user/elfload.c
===================================================================
--- linux-user/elfload.c.orig	2007-10-14 16:22:00.000000000 +0000
+++ linux-user/elfload.c	2007-10-15 15:41:58.000000000 +0000
@@ -346,7 +346,7 @@
     pos += sizeof(abi_ulong);
     _regs->gpr[4] = pos;
     for (tmp = 1; tmp != 0; pos += sizeof(abi_ulong))
-        tmp = ldl(pos);
+        tmp = ldul(pos);
     _regs->gpr[5] = pos;
 }
 
Index: linux-user/qemu.h
===================================================================
--- linux-user/qemu.h.orig	2007-10-14 16:22:00.000000000 +0000
+++ linux-user/qemu.h	2007-10-15 15:41:10.000000000 +0000
@@ -323,7 +323,7 @@
 #define tput8(addr, val) stb(addr, val)
 #define tget16(addr) lduw(addr)
 #define tput16(addr, val) stw(addr, val)
-#define tget32(addr) ldl(addr)
+#define tget32(addr) ldul(addr)
 #define tput32(addr, val) stl(addr, val)
 #define tget64(addr) ldq(addr)
 #define tput64(addr, val) stq(addr, val)
@@ -331,7 +331,7 @@
 #define tgetl(addr) ldq(addr)
 #define tputl(addr, val) stq(addr, val)
 #else
-#define tgetl(addr) ldl(addr)
+#define tgetl(addr) ldul(addr)
 #define tputl(addr, val) stl(addr, val)
 #endif
 
Index: linux-user/signal.c
===================================================================
--- linux-user/signal.c.orig	2007-10-14 16:22:00.000000000 +0000
+++ linux-user/signal.c	2007-10-15 15:41:10.000000000 +0000
@@ -878,28 +878,28 @@
         cpu_x86_load_seg(env, R_ES, lduw(&sc->es));
         cpu_x86_load_seg(env, R_DS, lduw(&sc->ds));
 
-        env->regs[R_EDI] = ldl(&sc->edi);
-        env->regs[R_ESI] = ldl(&sc->esi);
-        env->regs[R_EBP] = ldl(&sc->ebp);
-        env->regs[R_ESP] = ldl(&sc->esp);
-        env->regs[R_EBX] = ldl(&sc->ebx);
-        env->regs[R_EDX] = ldl(&sc->edx);
-        env->regs[R_ECX] = ldl(&sc->ecx);
-        env->eip = ldl(&sc->eip);
+        env->regs[R_EDI] = ldul(&sc->edi);
+        env->regs[R_ESI] = ldul(&sc->esi);
+        env->regs[R_EBP] = ldul(&sc->ebp);
+        env->regs[R_ESP] = ldul(&sc->esp);
+        env->regs[R_EBX] = ldul(&sc->ebx);
+        env->regs[R_EDX] = ldul(&sc->edx);
+        env->regs[R_ECX] = ldul(&sc->ecx);
+        env->eip = ldul(&sc->eip);
 
         cpu_x86_load_seg(env, R_CS, lduw(&sc->cs) | 3);
         cpu_x86_load_seg(env, R_SS, lduw(&sc->ss) | 3);
 
 	{
 		unsigned int tmpflags;
-                tmpflags = ldl(&sc->eflags);
+                tmpflags = ldul(&sc->eflags);
 		env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
                 //		regs->orig_eax = -1;		/* disable syscall checks */
 	}
 
 	{
 		struct _fpstate * buf;
-                buf = (void *)ldl(&sc->fpstate);
+                buf = (void *)ldul(&sc->fpstate);
 		if (buf) {
 #if 0
 			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
@@ -909,7 +909,7 @@
 		}
 	}
 
-        *peax = ldl(&sc->eax);
+        *peax = ldul(&sc->eax);
 	return err;
 #if 0
 badframe:
Index: linux-user/vm86.c
===================================================================
--- linux-user/vm86.c.orig	2007-10-14 16:22:00.000000000 +0000
+++ linux-user/vm86.c	2007-10-15 15:41:10.000000000 +0000
@@ -56,7 +56,7 @@
 
 static inline unsigned int vm_getl(uint8_t *segptr, unsigned int reg16)
 {
-    return ldl(segptr + (reg16 & 0xffff));
+    return ldul(segptr + (reg16 & 0xffff));
 }
 
 void save_v86_state(CPUX86State *env)
Index: target-alpha/exec.h
===================================================================
--- target-alpha/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-alpha/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -62,6 +62,9 @@
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-alpha/helper.c
===================================================================
--- target-alpha/helper.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-alpha/helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -69,7 +69,7 @@
             env->exception_index = EXCP_DTB_MISS_PAL;
         else
             env->exception_index = EXCP_DTB_MISS_NATIVE;
-        opc = (ldl_code(env->pc) >> 21) << 4;
+        opc = (ldul_code(env->pc) >> 21) << 4;
         if (rw) {
             opc |= 0x9;
         } else {
Index: target-alpha/op_helper.c
===================================================================
--- target-alpha/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-alpha/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -1213,6 +1213,7 @@
 
 #define MMUSUFFIX _mmu
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1225,6 +1226,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
Index: target-alpha/op_mem.h
===================================================================
--- target-alpha/op_mem.h.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-alpha/op_mem.h	2007-10-15 15:41:10.000000000 +0000
@@ -30,7 +30,7 @@
 {
     env->lock = EA;
 
-    return glue(ldl, MEMSUFFIX)(EA);
+    return glue(ldul, MEMSUFFIX)(EA);
 }
 
 static inline uint32_t glue(ldq_l, MEMSUFFIX) (target_ulong EA)
@@ -82,7 +82,7 @@
 ALPHA_ST_OP(b, stb);
 ALPHA_LD_OP(wu, lduw);
 ALPHA_ST_OP(w, stw);
-ALPHA_LD_OP(l, ldl);
+ALPHA_LD_OP(l, ldul);
 ALPHA_ST_OP(l, stl);
 ALPHA_LD_OP(q, ldq);
 ALPHA_ST_OP(q, stq);
Index: target-alpha/translate.c
===================================================================
--- target-alpha/translate.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ target-alpha/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -2010,7 +2010,7 @@
                     ctx.pc, ctx.mem_idx);
         }
 #endif
-        insn = ldl_code(ctx.pc);
+        insn = ldul_code(ctx.pc);
 #if defined ALPHA_DEBUG_DISAS
         insn_count++;
         if (logfile != NULL) {
Index: target-arm/exec.h
===================================================================
--- target-arm/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-arm/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -64,6 +64,9 @@
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 /* In op_helper.c */
Index: target-arm/helper.c
===================================================================
--- target-arm/helper.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-arm/helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -297,7 +297,7 @@
             if (env->thumb) {
                 mask = lduw_code(env->regs[15] - 2) & 0xff;
             } else {
-                mask = ldl_code(env->regs[15] - 4) & 0xffffff;
+                mask = ldul_code(env->regs[15] - 4) & 0xffffff;
             }
             /* Only intercept calls from privileged modes, to provide some
                semblance of security.  */
@@ -473,7 +473,7 @@
         /* Pagetable walk.  */
         /* Lookup l1 descriptor.  */
         table = (env->cp15.c2_base & 0xffffc000) | ((address >> 18) & 0x3ffc);
-        desc = ldl_phys(table);
+        desc = ldul_phys(table);
         type = (desc & 3);
         domain = (env->cp15.c3 >> ((desc >> 4) & 0x1e)) & 3;
         if (type == 0) {
@@ -502,7 +502,7 @@
                 /* Fine pagetable.  */
                 table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
             }
-            desc = ldl_phys(table);
+            desc = ldul_phys(table);
             switch (desc & 3) {
             case 0: /* Page translation fault.  */
                 code = 7;
Index: target-arm/op_helper.c
===================================================================
--- target-arm/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-arm/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -180,6 +180,7 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -192,6 +193,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
Index: target-arm/op_mem.h
===================================================================
--- target-arm/op_mem.h.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-arm/op_mem.h	2007-10-15 15:41:10.000000000 +0000
@@ -1,18 +1,17 @@
 /* ARM memory operations.  */
 
-/* Load from address T1 into T0.  */
-#define MEM_LD_OP(name) \
+#define MEM_LD_OP(name, lname) \
 void OPPROTO glue(op_ld##name,MEMSUFFIX)(void) \
 { \
-    T0 = glue(ld##name,MEMSUFFIX)(T1); \
+    T0 = glue(ld##lname,MEMSUFFIX)(T1); \
     FORCE_RET(); \
 }
 
-MEM_LD_OP(ub)
-MEM_LD_OP(sb)
-MEM_LD_OP(uw)
-MEM_LD_OP(sw)
-MEM_LD_OP(l)
+MEM_LD_OP(ub,ub)
+MEM_LD_OP(sb,sb)
+MEM_LD_OP(uw,uw)
+MEM_LD_OP(sw,sw)
+MEM_LD_OP(l,ul)
 
 #undef MEM_LD_OP
 
@@ -45,7 +44,7 @@
 }
 
 MEM_SWP_OP(b, ub)
-MEM_SWP_OP(l, l)
+MEM_SWP_OP(l, ul)
 
 #undef MEM_SWP_OP
 
@@ -82,7 +81,7 @@
 
 MMX_MEM_OP(b, ub)
 MMX_MEM_OP(w, uw)
-MMX_MEM_OP(l, l)
+MMX_MEM_OP(l, ul)
 MMX_MEM_OP(q, q)
 
 #undef MMX_MEM_OP
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ target-arm/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -2206,7 +2206,7 @@
 {
     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
 
-    insn = ldl_code(s->pc);
+    insn = ldul_code(s->pc);
     s->pc += 4;
 
     cond = insn >> 28;
Index: target-cris/exec.h
===================================================================
--- target-cris/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-cris/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -50,6 +50,9 @@
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_cris_flush_flags(CPUCRISState *env, int cc_op);
Index: target-cris/helper.c
===================================================================
--- target-cris/helper.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-cris/helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -106,7 +106,7 @@
 //			printf ("BREAK! %d\n", env->trapnr);
 			irqnum = env->trapnr;
 			ebp = env->pregs[SR_EBP];
-			isr = ldl_code(ebp + irqnum * 4);
+			isr = ldul_code(ebp + irqnum * 4);
 			env->pregs[SR_ERP] = env->pc + 2;
 			env->pc = isr;
 
@@ -117,7 +117,7 @@
 //			printf ("MMU miss\n");
 			irqnum = 4;
 			ebp = env->pregs[SR_EBP];
-			isr = ldl_code(ebp + irqnum * 4);
+			isr = ldul_code(ebp + irqnum * 4);
 			env->pregs[SR_ERP] = env->pc;
 			env->pc = isr;
 			cris_shift_ccs(env);
@@ -138,7 +138,7 @@
 					__builtin_clz(env->pending_interrupts);
 				irqnum += 0x30;
 				ebp = env->pregs[SR_EBP];
-				isr = ldl_code(ebp + irqnum * 4);
+				isr = ldul_code(ebp + irqnum * 4);
 				env->pregs[SR_ERP] = env->pc;
 				env->pc = isr;
 
Index: target-cris/op_helper.c
===================================================================
--- target-cris/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-cris/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -25,6 +25,7 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -37,6 +38,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
Index: target-cris/op_mem.c
===================================================================
--- target-cris/op_mem.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-cris/op_mem.c	2007-10-15 15:41:10.000000000 +0000
@@ -49,7 +49,7 @@
 }
 
 void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
-    T0 = glue(ldl, MEMSUFFIX) (T0);
+    T0 = glue(ldul, MEMSUFFIX) (T0);
     RETURN();
 }
 
Index: target-cris/translate.c
===================================================================
--- target-cris/translate.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ target-cris/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -828,7 +828,7 @@
 		if (memsize == 1)
 			insn_len++;
 
-		imm = ldl_code(dc->pc + 2);
+		imm = ldul_code(dc->pc + 2);
 		if (memsize != 4) {
 			if (s_ext) {
 				imm = sign_extend(imm, (memsize * 8) - 1);
@@ -1962,7 +1962,7 @@
 	rd = dc->op2;
 
 	cris_cc_mask(dc, 0);
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 	DIS(fprintf (logfile, "lapc 0x%x, $r%u\n", imm + dc->pc, dc->op2));
 	gen_op_movl_T0_im (dc->pc + imm);
 	gen_movl_reg_T0[rd] ();
@@ -1999,7 +1999,7 @@
 {
 	uint32_t imm;
 
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "jas 0x%x\n", imm));
 	cris_cc_mask(dc, 0);
@@ -2016,7 +2016,7 @@
 {
 	uint32_t imm;
 
-	imm = ldl_code(dc->pc + 2);
+	imm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "jasc 0x%x\n", imm));
 	cris_cc_mask(dc, 0);
@@ -2047,7 +2047,7 @@
 	int32_t offset;
 	uint32_t cond = dc->op2;
 
-	offset = ldl_code(dc->pc + 2);
+	offset = ldul_code(dc->pc + 2);
 	offset = sign_extend(offset, 15);
 
 	DIS(fprintf (logfile, "b%s %d pc=%x dst=%x\n",
@@ -2065,7 +2065,7 @@
 	int32_t simm;
 
 
-	simm = ldl_code(dc->pc + 2);
+	simm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "bas 0x%x, $p%u\n", dc->pc + simm, dc->op2));
 	cris_cc_mask(dc, 0);
@@ -2081,7 +2081,7 @@
 static unsigned int dec_basc_im(DisasContext *dc)
 {
 	int32_t simm;
-	simm = ldl_code(dc->pc + 2);
+	simm = ldul_code(dc->pc + 2);
 
 	DIS(fprintf (logfile, "basc 0x%x, $p%u\n", dc->pc + simm, dc->op2));
 	cris_cc_mask(dc, 0);
@@ -2259,7 +2259,7 @@
 	int i;
 
 	/* Load a halfword onto the instruction register.  */
-	tmp = ldl_code(dc->pc);
+	tmp = ldul_code(dc->pc);
 	dc->ir = tmp & 0xffff;
 
 	/* Now decode it.  */
Index: target-i386/exec.h
===================================================================
--- target-i386/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-i386/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -217,6 +217,9 @@
 #if !defined(CONFIG_USER_ONLY)
 
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 
 static inline double ldfq(target_ulong ptr)
 {
@@ -244,7 +247,7 @@
         float f;
         uint32_t i;
     } u;
-    u.i = ldl(ptr);
+    u.i = ldul(ptr);
     return u.f;
 }
 
@@ -419,12 +422,12 @@
 
 static inline CPU86_LDouble helper_fldt(target_ulong ptr)
 {
-    return *(CPU86_LDouble *)ptr;
+    return *(CPU86_LDouble *)(unsigned long)ptr;
 }
 
 static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
 {
-    *(CPU86_LDouble *)ptr = f;
+    *(CPU86_LDouble *)(unsigned long)ptr = f;
 }
 
 #else
Index: target-i386/helper.c
===================================================================
--- target-i386/helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-i386/helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -122,8 +122,8 @@
     if ((index + 7) > dt->limit)
         return -1;
     ptr = dt->base + index;
-    *e1_ptr = ldl_kernel(ptr);
-    *e2_ptr = ldl_kernel(ptr + 4);
+    *e1_ptr = ldul_kernel(ptr);
+    *e2_ptr = ldul_kernel(ptr + 4);
     return 0;
 }
 
@@ -186,7 +186,7 @@
         *esp_ptr = lduw_kernel(env->tr.base + index);
         *ss_ptr = lduw_kernel(env->tr.base + index + 2);
     } else {
-        *esp_ptr = ldl_kernel(env->tr.base + index);
+        *esp_ptr = ldul_kernel(env->tr.base + index);
         *ss_ptr = lduw_kernel(env->tr.base + index + 4);
     }
 }
@@ -302,15 +302,15 @@
     /* read all the registers from the new TSS */
     if (type & 8) {
         /* 32 bit */
-        new_cr3 = ldl_kernel(tss_base + 0x1c);
-        new_eip = ldl_kernel(tss_base + 0x20);
-        new_eflags = ldl_kernel(tss_base + 0x24);
+        new_cr3 = ldul_kernel(tss_base + 0x1c);
+        new_eip = ldul_kernel(tss_base + 0x20);
+        new_eflags = ldul_kernel(tss_base + 0x24);
         for(i = 0; i < 8; i++)
-            new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
+            new_regs[i] = ldul_kernel(tss_base + (0x28 + i * 4));
         for(i = 0; i < 6; i++)
             new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
         new_ldt = lduw_kernel(tss_base + 0x60);
-        new_trap = ldl_kernel(tss_base + 0x64);
+        new_trap = ldul_kernel(tss_base + 0x64);
     } else {
         /* 16 bit */
         new_cr3 = 0;
@@ -341,7 +341,7 @@
         target_ulong ptr;
         uint32_t e2;
         ptr = env->gdt.base + (env->tr.selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
+        e2 = ldul_kernel(ptr + 4);
         e2 &= ~DESC_TSS_BUSY_MASK;
         stl_kernel(ptr + 4, e2);
     }
@@ -393,7 +393,7 @@
         target_ulong ptr;
         uint32_t e2;
         ptr = env->gdt.base + (tss_selector & ~7);
-        e2 = ldl_kernel(ptr + 4);
+        e2 = ldul_kernel(ptr + 4);
         e2 |= DESC_TSS_BUSY_MASK;
         stl_kernel(ptr + 4, e2);
     }
@@ -456,8 +456,8 @@
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
             raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
         if (!(e2 & DESC_P_MASK))
@@ -580,7 +580,7 @@
 
 #define POPL(ssp, sp, sp_mask, val)\
 {\
-    val = (uint32_t)ldl_kernel((ssp) + (sp & (sp_mask)));\
+    val = (uint32_t)ldul_kernel((ssp) + (sp & (sp_mask)));\
     sp += 4;\
 }
 
@@ -629,8 +629,8 @@
     if (intno * 8 + 7 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
     ptr = dt->base + intno * 8;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
+    e1 = ldul_kernel(ptr);
+    e2 = ldul_kernel(ptr + 4);
     /* check gate type */
     type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
     switch(type) {
@@ -875,9 +875,9 @@
     if (intno * 16 + 15 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
     ptr = dt->base + intno * 16;
-    e1 = ldl_kernel(ptr);
-    e2 = ldl_kernel(ptr + 4);
-    e3 = ldl_kernel(ptr + 8);
+    e1 = ldul_kernel(ptr);
+    e2 = ldul_kernel(ptr + 4);
+    e3 = ldul_kernel(ptr + 8);
     /* check gate type */
     type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
     switch(type) {
@@ -1147,7 +1147,7 @@
 
     dt = &env->idt;
     ptr = dt->base + (intno * 8);
-    e2 = ldl_kernel(ptr + 4);
+    e2 = ldul_kernel(ptr + 4);
 
     dpl = (e2 >> DESC_DPL_SHIFT) & 3;
     cpl = env->hflags & HF_CPL_MASK;
@@ -1469,24 +1469,24 @@
         cpu_x86_load_seg_cache(env, i,
                                lduw_phys(sm_state + offset),
                                ldq_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
+                               ldul_phys(sm_state + offset + 4),
                                (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8);
     }
 
     env->gdt.base = ldq_phys(sm_state + 0x7e68);
-    env->gdt.limit = ldl_phys(sm_state + 0x7e64);
+    env->gdt.limit = ldul_phys(sm_state + 0x7e64);
 
     env->ldt.selector = lduw_phys(sm_state + 0x7e70);
     env->ldt.base = ldq_phys(sm_state + 0x7e78);
-    env->ldt.limit = ldl_phys(sm_state + 0x7e74);
+    env->ldt.limit = ldul_phys(sm_state + 0x7e74);
     env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8;
 
     env->idt.base = ldq_phys(sm_state + 0x7e88);
-    env->idt.limit = ldl_phys(sm_state + 0x7e84);
+    env->idt.limit = ldul_phys(sm_state + 0x7e84);
 
     env->tr.selector = lduw_phys(sm_state + 0x7e90);
     env->tr.base = ldq_phys(sm_state + 0x7e98);
-    env->tr.limit = ldl_phys(sm_state + 0x7e94);
+    env->tr.limit = ldul_phys(sm_state + 0x7e94);
     env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8;
 
     EAX = ldq_phys(sm_state + 0x7ff8);
@@ -1500,51 +1500,51 @@
     for(i = 8; i < 16; i++)
         env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8);
     env->eip = ldq_phys(sm_state + 0x7f78);
-    load_eflags(ldl_phys(sm_state + 0x7f70),
+    load_eflags(ldul_phys(sm_state + 0x7f70),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->dr[6] = ldl_phys(sm_state + 0x7f68);
-    env->dr[7] = ldl_phys(sm_state + 0x7f60);
+    env->dr[6] = ldul_phys(sm_state + 0x7f68);
+    env->dr[7] = ldul_phys(sm_state + 0x7f60);
 
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50));
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58));
+    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f48));
+    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7f50));
+    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7f58));
 
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff;
+        env->smbase = ldul_phys(sm_state + 0x7f00) & ~0x7fff;
     }
 #else
-    cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc));
-    cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8));
-    load_eflags(ldl_phys(sm_state + 0x7ff4),
+    cpu_x86_update_cr0(env, ldul_phys(sm_state + 0x7ffc));
+    cpu_x86_update_cr3(env, ldul_phys(sm_state + 0x7ff8));
+    load_eflags(ldul_phys(sm_state + 0x7ff4),
                 ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
-    env->eip = ldl_phys(sm_state + 0x7ff0);
-    EDI = ldl_phys(sm_state + 0x7fec);
-    ESI = ldl_phys(sm_state + 0x7fe8);
-    EBP = ldl_phys(sm_state + 0x7fe4);
-    ESP = ldl_phys(sm_state + 0x7fe0);
-    EBX = ldl_phys(sm_state + 0x7fdc);
-    EDX = ldl_phys(sm_state + 0x7fd8);
-    ECX = ldl_phys(sm_state + 0x7fd4);
-    EAX = ldl_phys(sm_state + 0x7fd0);
-    env->dr[6] = ldl_phys(sm_state + 0x7fcc);
-    env->dr[7] = ldl_phys(sm_state + 0x7fc8);
-
-    env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff;
-    env->tr.base = ldl_phys(sm_state + 0x7f64);
-    env->tr.limit = ldl_phys(sm_state + 0x7f60);
-    env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
-
-    env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff;
-    env->ldt.base = ldl_phys(sm_state + 0x7f80);
-    env->ldt.limit = ldl_phys(sm_state + 0x7f7c);
-    env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
+    env->eip = ldul_phys(sm_state + 0x7ff0);
+    EDI = ldul_phys(sm_state + 0x7fec);
+    ESI = ldul_phys(sm_state + 0x7fe8);
+    EBP = ldul_phys(sm_state + 0x7fe4);
+    ESP = ldul_phys(sm_state + 0x7fe0);
+    EBX = ldul_phys(sm_state + 0x7fdc);
+    EDX = ldul_phys(sm_state + 0x7fd8);
+    ECX = ldul_phys(sm_state + 0x7fd4);
+    EAX = ldul_phys(sm_state + 0x7fd0);
+    env->dr[6] = ldul_phys(sm_state + 0x7fcc);
+    env->dr[7] = ldul_phys(sm_state + 0x7fc8);
+
+    env->tr.selector = ldul_phys(sm_state + 0x7fc4) & 0xffff;
+    env->tr.base = ldul_phys(sm_state + 0x7f64);
+    env->tr.limit = ldul_phys(sm_state + 0x7f60);
+    env->tr.flags = (ldul_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
+
+    env->ldt.selector = ldul_phys(sm_state + 0x7fc0) & 0xffff;
+    env->ldt.base = ldul_phys(sm_state + 0x7f80);
+    env->ldt.limit = ldul_phys(sm_state + 0x7f7c);
+    env->ldt.flags = (ldul_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
 
-    env->gdt.base = ldl_phys(sm_state + 0x7f74);
-    env->gdt.limit = ldl_phys(sm_state + 0x7f70);
+    env->gdt.base = ldul_phys(sm_state + 0x7f74);
+    env->gdt.limit = ldul_phys(sm_state + 0x7f70);
 
-    env->idt.base = ldl_phys(sm_state + 0x7f58);
-    env->idt.limit = ldl_phys(sm_state + 0x7f54);
+    env->idt.base = ldul_phys(sm_state + 0x7f58);
+    env->idt.limit = ldul_phys(sm_state + 0x7f54);
 
     for(i = 0; i < 6; i++) {
         if (i < 3)
@@ -1552,16 +1552,16 @@
         else
             offset = 0x7f2c + (i - 3) * 12;
         cpu_x86_load_seg_cache(env, i,
-                               ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
-                               ldl_phys(sm_state + offset + 8),
-                               ldl_phys(sm_state + offset + 4),
-                               (ldl_phys(sm_state + offset) & 0xf0ff) << 8);
+                               ldul_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
+                               ldul_phys(sm_state + offset + 8),
+                               ldul_phys(sm_state + offset + 4),
+                               (ldul_phys(sm_state + offset) & 0xf0ff) << 8);
     }
-    cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14));
+    cpu_x86_update_cr4(env, ldul_phys(sm_state + 0x7f14));
 
-    val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+    val = ldul_phys(sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
-        env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff;
+        env->smbase = ldul_phys(sm_state + 0x7ef8) & ~0x7fff;
     }
 #endif
     CC_OP = CC_OP_EFLAGS;
@@ -1761,7 +1761,7 @@
         while (--level) {
             esp -= 4;
             ebp -= 4;
-            stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask)));
+            stl(ssp + (esp & esp_mask), ldul(ssp + (ebp & esp_mask)));
         }
         esp -= 4;
         stl(ssp + (esp & esp_mask), T1);
@@ -1836,8 +1836,8 @@
         if ((index + entry_limit) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         if (!(e2 & DESC_P_MASK))
@@ -1845,7 +1845,7 @@
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             uint32_t e3;
-            e3 = ldl_kernel(ptr + 8);
+            e3 = ldul_kernel(ptr + 8);
             load_seg_cache_raw_dt(&env->ldt, e1, e2);
             env->ldt.base |= (target_ulong)e3 << 32;
         } else
@@ -1885,8 +1885,8 @@
         if ((index + entry_limit) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
         type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
         if ((e2 & DESC_S_MASK) ||
             (type != 1 && type != 9))
@@ -1896,8 +1896,8 @@
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
             uint32_t e3, e4;
-            e3 = ldl_kernel(ptr + 8);
-            e4 = ldl_kernel(ptr + 12);
+            e3 = ldul_kernel(ptr + 8);
+            e4 = ldul_kernel(ptr + 12);
             if ((e4 >> DESC_TYPE_SHIFT) & 0xf)
                 raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
             load_seg_cache_raw_dt(&env->tr, e1, e2);
@@ -1943,8 +1943,8 @@
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl_kernel(ptr);
-        e2 = ldl_kernel(ptr + 4);
+        e1 = ldul_kernel(ptr);
+        e2 = ldul_kernel(ptr + 4);
 
         if (!(e2 & DESC_S_MASK))
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
@@ -2273,7 +2273,7 @@
                 PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector);
                 PUSHL(ssp, sp, sp_mask, ESP);
                 for(i = param_count - 1; i >= 0; i--) {
-                    val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
+                    val = ldul_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
                     PUSHL(ssp, sp, sp_mask, val);
                 }
             } else {
@@ -3569,8 +3569,8 @@
 
     if (env->cr[4] & CR4_OSFXSR_MASK) {
         /* XXX: finish it */
-        env->mxcsr = ldl(ptr + 0x18);
-        //ldl(ptr + 0x1c);
+        env->mxcsr = ldul(ptr + 0x18);
+        //ldul(ptr + 0x1c);
         nb_xmm_regs = 8 << data64;
         addr = ptr + 0xa0;
         for(i = 0; i < nb_xmm_regs; i++) {
@@ -3867,6 +3867,7 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -3879,6 +3880,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 #endif
 
 /* try to fill the TLB and return an exception if error. If retaddr is
@@ -4010,13 +4026,13 @@
     env->intercept_cr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write));
     env->intercept_dr_read    = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read));
     env->intercept_dr_write   = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write));
-    env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
+    env->intercept_exceptions = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
 
     env->gdt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
+    env->gdt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
 
     env->idt.base  = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
+    env->idt.limit = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
 
     /* clear exit_info_2 so we behave like the real hardware */
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
@@ -4025,7 +4041,7 @@
     cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
     cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
     env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
-    int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+    int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
     if (int_ctl & V_INTR_MASKING_MASK) {
         env->cr[8] = int_ctl & V_TPR_MASK;
         if (env->eflags & IF_MASK)
@@ -4073,11 +4089,11 @@
     regs_to_env();
 
     /* maybe we need to inject an event */
-    event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+    event_inj = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
     if (event_inj & SVM_EVTINJ_VALID) {
         uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
         uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
-        uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
+        uint32_t event_inj_err = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
 
         if (loglevel & CPU_LOG_TB_IN_ASM)
@@ -4309,7 +4325,7 @@
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]);
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]);
 
-    if ((int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
+    if ((int_ctl = ldul_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl))) & V_INTR_MASKING_MASK) {
         int_ctl &= ~V_TPR_MASK;
         int_ctl |= env->cr[8] & V_TPR_MASK;
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl);
@@ -4330,10 +4346,10 @@
     env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
 
     env->gdt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
-    env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
+    env->gdt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
 
     env->idt.base  = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
-    env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
+    env->idt.limit = ldul_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
 
     cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
     cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
Index: target-i386/helper2.c
===================================================================
--- target-i386/helper2.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-i386/helper2.c	2007-10-15 15:41:10.000000000 +0000
@@ -772,7 +772,7 @@
         /* page directory entry */
         pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) &
             env->a20_mask;
-        pde = ldl_phys(pde_addr);
+        pde = ldul_phys(pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             error_code = 0;
             goto do_fault;
@@ -810,7 +810,7 @@
             /* page directory entry */
             pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
                 env->a20_mask;
-            pte = ldl_phys(pte_addr);
+            pte = ldul_phys(pte_addr);
             if (!(pte & PG_PRESENT_MASK)) {
                 error_code = 0;
                 goto do_fault;
@@ -910,13 +910,13 @@
 
             pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pml4e = ldl_phys(pml4e_addr);
+            pml4e = ldul_phys(pml4e_addr);
             if (!(pml4e & PG_PRESENT_MASK))
                 return -1;
 
             pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) &
                 env->a20_mask;
-            pdpe = ldl_phys(pdpe_addr);
+            pdpe = ldul_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK))
                 return -1;
         } else
@@ -924,14 +924,14 @@
         {
             pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
                 env->a20_mask;
-            pdpe = ldl_phys(pdpe_addr);
+            pdpe = ldul_phys(pdpe_addr);
             if (!(pdpe & PG_PRESENT_MASK))
                 return -1;
         }
 
         pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) &
             env->a20_mask;
-        pde = ldl_phys(pde_addr);
+        pde = ldul_phys(pde_addr);
         if (!(pde & PG_PRESENT_MASK)) {
             return -1;
         }
@@ -944,7 +944,7 @@
             pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) &
                 env->a20_mask;
             page_size = 4096;
-            pte = ldl_phys(pte_addr);
+            pte = ldul_phys(pte_addr);
         }
     } else {
         if (!(env->cr[0] & CR0_PG_MASK)) {
@@ -953,7 +953,7 @@
         } else {
             /* page directory entry */
             pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
-            pde = ldl_phys(pde_addr);
+            pde = ldul_phys(pde_addr);
             if (!(pde & PG_PRESENT_MASK))
                 return -1;
             if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
@@ -962,7 +962,7 @@
             } else {
                 /* page directory entry */
                 pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
-                pte = ldl_phys(pte_addr);
+                pte = ldul_phys(pte_addr);
                 if (!(pte & PG_PRESENT_MASK))
                     return -1;
                 page_size = 4096;
Index: target-i386/op.c
===================================================================
--- target-i386/op.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-i386/op.c	2007-10-15 15:41:10.000000000 +0000
@@ -716,8 +716,8 @@
 void OPPROTO op_boundl(void)
 {
     int low, high, v;
-    low = ldl(A0);
-    high = ldl(A0 + 4);
+    low = ldul(A0);
+    high = ldul(A0 + 4);
     v = T0;
     if (v < low || v > high) {
         raise_exception(EXCP05_BOUND);
@@ -747,8 +747,6 @@
 
 /* multiple size ops */
 
-#define ldul ldl
-
 #define SHIFT 0
 #include "ops_template.h"
 #undef SHIFT
@@ -1688,7 +1686,7 @@
 void OPPROTO op_flds_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = ldl(A0);
+    FP_CONVERT.i32 = ldul(A0);
     FT0 = FP_CONVERT.f;
 #else
     FT0 = ldfl(A0);
@@ -1715,7 +1713,7 @@
 
 void helper_fildl_FT0_A0(void)
 {
-    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
 }
 
 void helper_fildll_FT0_A0(void)
@@ -1753,10 +1751,10 @@
 void OPPROTO op_fildl_FT0_A0(void)
 {
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    FP_CONVERT.i32 = (int32_t) ldul(A0);
     FT0 = (CPU86_LDouble)FP_CONVERT.i32;
 #else
-    FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
+    FT0 = (CPU86_LDouble)((int32_t)ldul(A0));
 #endif
 }
 
@@ -1778,7 +1776,7 @@
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = ldl(A0);
+    FP_CONVERT.i32 = ldul(A0);
     env->fpregs[new_fpstt].d = FP_CONVERT.f;
 #else
     env->fpregs[new_fpstt].d = ldfl(A0);
@@ -1822,7 +1820,7 @@
 {
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
 }
@@ -1872,10 +1870,10 @@
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
 #ifdef USE_FP_CONVERT
-    FP_CONVERT.i32 = (int32_t) ldl(A0);
+    FP_CONVERT.i32 = (int32_t) ldul(A0);
     env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i32;
 #else
-    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
+    env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldul(A0));
 #endif
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
Index: target-i386/ops_mem.h
===================================================================
--- target-i386/ops_mem.h.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-i386/ops_mem.h	2007-10-15 15:41:10.000000000 +0000
@@ -20,7 +20,7 @@
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T0_A0)(void)
 {
-    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldub, MEMSUFFIX), _T1_A0)(void)
@@ -45,7 +45,7 @@
 
 void OPPROTO glue(glue(op_ldl, MEMSUFFIX), _T1_A0)(void)
 {
-    T1 = (uint32_t)glue(ldl, MEMSUFFIX)(A0);
+    T1 = (uint32_t)glue(ldul, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_stb, MEMSUFFIX), _T0_A0)(void)
@@ -122,12 +122,12 @@
 #ifdef TARGET_X86_64
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T0_A0)(void)
 {
-    T0 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    T0 = glue(ldsl, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldsl, MEMSUFFIX), _T1_A0)(void)
 {
-    T1 = (int32_t)glue(ldl, MEMSUFFIX)(A0);
+    T1 = glue(ldsl, MEMSUFFIX)(A0);
 }
 
 void OPPROTO glue(glue(op_ldq, MEMSUFFIX), _T0_A0)(void)
Index: target-i386/svm.h
===================================================================
--- target-i386/svm.h.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-i386/svm.h	2007-10-15 15:41:10.000000000 +0000
@@ -340,13 +340,13 @@
                     R_##seg_index, \
                     lduw_phys(addr + offsetof(struct vmcb, save.seg.selector)),\
                     ldq_phys(addr + offsetof(struct vmcb, save.seg.base)),\
-                    ldl_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
-                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), ldq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldl_phys(addr + offsetof(struct vmcb, save.seg.limit))))
+                    ldul_phys(addr + offsetof(struct vmcb, save.seg.limit)),\
+                    vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg.attrib)), ldq_phys(addr + offsetof(struct vmcb, save.seg.base)), ldul_phys(addr + offsetof(struct vmcb, save.seg.limit))))
 
 #define SVM_LOAD_SEG2(addr, seg_qemu, seg_vmcb) \
     env->seg_qemu.selector  = lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.selector)); \
     env->seg_qemu.base      = ldq_phys(addr + offsetof(struct vmcb, save.seg_vmcb.base)); \
-    env->seg_qemu.limit     = ldl_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
+    env->seg_qemu.limit     = ldul_phys(addr + offsetof(struct vmcb, save.seg_vmcb.limit)); \
     env->seg_qemu.flags     = vmcb2cpu_attrib(lduw_phys(addr + offsetof(struct vmcb, save.seg_vmcb.attrib)), env->seg_qemu.base, env->seg_qemu.limit)
 
 #define SVM_SAVE_SEG(addr, seg_qemu, seg_vmcb) \
Index: target-i386/translate-copy.c
===================================================================
--- target-i386/translate-copy.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-i386/translate-copy.c	2007-10-15 15:41:10.000000000 +0000
@@ -207,7 +207,7 @@
         case 0:
             if (base == 5) {
                 base = -1;
-                disp = ldl_code(s->pc);
+                disp = ldul_code(s->pc);
                 s->pc += 4;
             } else {
                 disp = 0;
@@ -218,7 +218,7 @@
             break;
         default:
         case 2:
-            disp = ldl_code(s->pc);
+            disp = ldul_code(s->pc);
             s->pc += 4;
             break;
         }
@@ -266,7 +266,7 @@
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = ldul_code(s->pc);
         s->pc += 4;
         break;
     }
Index: target-i386/translate.c
===================================================================
--- target-i386/translate.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ target-i386/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -1462,7 +1462,7 @@
         case 0:
             if ((base & 7) == 5) {
                 base = -1;
-                disp = (int32_t)ldl_code(s->pc);
+                disp = (int32_t)ldul_code(s->pc);
                 s->pc += 4;
                 if (CODE64(s) && !havesib) {
                     disp += s->pc + s->rip_offset;
@@ -1476,7 +1476,7 @@
             break;
         default:
         case 2:
-            disp = ldl_code(s->pc);
+            disp = ldul_code(s->pc);
             s->pc += 4;
             break;
         }
@@ -1736,7 +1736,7 @@
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = ldul_code(s->pc);
         s->pc += 4;
         break;
     }
Index: target-m68k/exec.h
===================================================================
--- target-m68k/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-m68k/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -42,6 +42,9 @@
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 void cpu_m68k_flush_flags(CPUM68KState *env, int cc_op);
Index: target-m68k/op_helper.c
===================================================================
--- target-m68k/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-m68k/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -33,6 +33,7 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -45,6 +46,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
@@ -83,8 +99,8 @@
     uint32_t fmt;
 
     sp = env->aregs[7];
-    fmt = ldl_kernel(sp);
-    env->pc = ldl_kernel(sp + 4);
+    fmt = ldul_kernel(sp);
+    env->pc = ldul_kernel(sp + 4);
     sp |= (fmt >> 28) & 3;
     env->sr = fmt & 0xffff;
     m68k_switch_sp(env);
@@ -112,7 +128,7 @@
                     && (env->sr & SR_S) != 0
                     && (env->pc & 3) == 0
                     && lduw_code(env->pc - 4) == 0x4e71
-                    && ldl_code(env->pc) == 0x4e7bf000) {
+                    && ldul_code(env->pc) == 0x4e7bf000) {
                 env->pc += 4;
                 do_m68k_semihosting(env, env->dregs[0]);
                 return;
@@ -153,7 +169,7 @@
     stl_kernel(sp, fmt);
     env->aregs[7] = sp;
     /* Jump to vector.  */
-    env->pc = ldl_kernel(env->vbr + vector);
+    env->pc = ldul_kernel(env->vbr + vector);
 }
 
 #endif
Index: target-m68k/op_mem.h
===================================================================
--- target-m68k/op_mem.h.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-m68k/op_mem.h	2007-10-15 15:41:10.000000000 +0000
@@ -11,7 +11,7 @@
 MEM_LD_OP(8s32,sb)
 MEM_LD_OP(16u32,uw)
 MEM_LD_OP(16s32,sw)
-MEM_LD_OP(32,l)
+MEM_LD_OP(32,ul)
 
 #undef MEM_LD_OP
 
Index: target-mips/exec.h
===================================================================
--- target-mips/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-mips/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -54,6 +54,9 @@
 
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 #if defined(TARGET_MIPSN32) || defined(TARGET_MIPS64)
Index: target-mips/op_helper.c
===================================================================
--- target-mips/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-mips/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -544,6 +544,7 @@
 #define MMUSUFFIX _mmu
 #define ALIGNED_ONLY
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -556,6 +557,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr)
 {
     env->CP0_BadVAddr = addr;
Index: target-mips/op_mem.c
===================================================================
--- target-mips/op_mem.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-mips/op_mem.c	2007-10-15 15:41:10.000000000 +0000
@@ -57,13 +57,13 @@
 
 void glue(op_lw, MEMSUFFIX) (void)
 {
-    T0 = glue(ldl, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 
 void glue(op_lwu, MEMSUFFIX) (void)
 {
-    T0 = (uint32_t)glue(ldl, MEMSUFFIX)(T0);
+    T0 = (uint32_t)glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 
@@ -167,7 +167,7 @@
 void glue(op_ll, MEMSUFFIX) (void)
 {
     T1 = T0;
-    T0 = glue(ldl, MEMSUFFIX)(T0);
+    T0 = glue(ldul, MEMSUFFIX)(T0);
     env->CP0_LLAddr = T1;
     RETURN();
 }
@@ -383,7 +383,7 @@
 
 void glue(op_lwc1, MEMSUFFIX) (void)
 {
-    WT0 = glue(ldl, MEMSUFFIX)(T0);
+    WT0 = glue(ldul, MEMSUFFIX)(T0);
     RETURN();
 }
 void glue(op_swc1, MEMSUFFIX) (void)
Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ target-mips/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -6544,7 +6544,7 @@
             gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
             gen_opc_instr_start[lj] = 1;
         }
-        ctx.opcode = ldl_code(ctx.pc);
+        ctx.opcode = ldul_code(ctx.pc);
         decode_opc(env, &ctx);
         ctx.pc += 4;
 
Index: target-ppc/exec.h
===================================================================
--- target-ppc/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-ppc/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -91,7 +91,12 @@
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
+
+#include "softmmu_exec.h"
+#define REVERSE_ENDIAN
 #include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
+
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 void do_raise_exception_err (uint32_t exception, int error_code);
Index: target-ppc/helper.c
===================================================================
--- target-ppc/helper.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-ppc/helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -569,8 +569,8 @@
         } else
 #endif
         {
-            pte0 = ldl_phys(base + (i * 8));
-            pte1 =  ldl_phys(base + (i * 8) + 4);
+            pte0 = ldul_phys(base + (i * 8));
+            pte1 =  ldul_phys(base + (i * 8) + 4);
             r = pte32_check(ctx, pte0, pte1, h, rw, type);
 #if defined (DEBUG_MMU)
             if (loglevel != 0) {
@@ -686,7 +686,7 @@
     mask = 0x0000000000000000ULL; /* Avoid gcc warning */
     for (n = 0; n < env->slb_nr; n++) {
         tmp64 = ldq_phys(sr_base);
-        tmp = ldl_phys(sr_base + 8);
+        tmp = ldul_phys(sr_base + 8);
 #if defined(DEBUG_SLB)
         if (loglevel != 0) {
             fprintf(logfile, "%s: seg %d " PADDRX " %016" PRIx64 " %08"
@@ -784,7 +784,7 @@
     sr_base = env->spr[SPR_ASR];
     sr_base += 12 * slb_nr;
     tmp64 = ldq_phys(sr_base);
-    tmp = ldl_phys(sr_base + 8);
+    tmp = ldul_phys(sr_base + 8);
     if (tmp64 & 0x0000000008000000ULL) {
         /* SLB entry is valid */
         /* Copy SLB bits 62:88 to Rt 37:63 (VSID 23:49) */
@@ -990,10 +990,10 @@
                         sdr, mask + 0x80);
                 for (curaddr = sdr; curaddr < (sdr + mask + 0x80);
                      curaddr += 16) {
-                    a0 = ldl_phys(curaddr);
-                    a1 = ldl_phys(curaddr + 4);
-                    a2 = ldl_phys(curaddr + 8);
-                    a3 = ldl_phys(curaddr + 12);
+                    a0 = ldul_phys(curaddr);
+                    a1 = ldul_phys(curaddr + 4);
+                    a2 = ldul_phys(curaddr + 8);
+                    a3 = ldul_phys(curaddr + 12);
                     if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) {
                         fprintf(logfile,
                                 PADDRX ": %08x %08x %08x %08x\n",
@@ -2266,7 +2266,7 @@
 #endif
         /* XXX: this is false */
         /* Get rS/rD and rA from faulting opcode */
-        env->spr[SPR_DSISR] |= (ldl_code((env->nip - 4)) & 0x03FF0000) >> 16;
+        env->spr[SPR_DSISR] |= (ldul_code((env->nip - 4)) & 0x03FF0000) >> 16;
         goto store_current;
     case POWERPC_EXCP_PROGRAM:   /* Program exception                        */
         switch (env->error_code & ~0xF) {
Index: target-ppc/op_helper.c
===================================================================
--- target-ppc/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-ppc/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -2296,6 +2296,7 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -2308,6 +2309,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
Index: target-ppc/op_helper.h
===================================================================
--- target-ppc/op_helper.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-ppc/op_helper.h	2007-10-15 15:41:10.000000000 +0000
@@ -37,19 +37,6 @@
 void glue(do_POWER2_stfq, MEMSUFFIX) (void);
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void);
 
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst);
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stsw_64, MEMSUFFIX) (int src);
-void glue(do_stsw_le_64, MEMSUFFIX) (int src);
-void glue(do_lmw_64, MEMSUFFIX) (int dst);
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst);
-void glue(do_stmw_64, MEMSUFFIX) (int src);
-void glue(do_stmw_le_64, MEMSUFFIX) (int src);
-void glue(do_icbi_64, MEMSUFFIX) (void);
-void glue(do_dcbz_64, MEMSUFFIX) (void);
-#endif
-
 #else
 
 void do_print_mem_EA (target_ulong EA);
Index: target-ppc/op_helper_mem.h
===================================================================
--- target-ppc/op_helper_mem.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-ppc/op_helper_mem.h	2007-10-15 15:41:10.000000000 +0000
@@ -19,229 +19,101 @@
  */
 
 /* Multiple word / string load and store */
-static always_inline target_ulong glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000UL) >> 24) | ((tmp & 0x00FF0000UL) >> 8) |
-        ((tmp & 0x0000FF00UL) << 8) | ((tmp & 0x000000FFUL) << 24);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  target_ulong data)
-{
-    uint32_t tmp =
-        ((data & 0xFF000000UL) >> 24) | ((data & 0x00FF0000UL) >> 8) |
-        ((data & 0x0000FF00UL) << 8) | ((data & 0x000000FFUL) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
 void glue(do_lmw, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldul, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stmw_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
-#endif
 
 void glue(do_lmw_le, MEMSUFFIX) (int dst)
 {
     for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_lmw_le_64, MEMSUFFIX) (int dst)
-{
-    for (; dst < 32; dst++, T0 += 4) {
-        env->gpr[dst] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        env->gpr[dst] = glue(ldulr, MEMSUFFIX)(T0);
     }
 }
-#endif
 
 void glue(do_stmw_le, MEMSUFFIX) (int src)
 {
     for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src]);
     }
 }
 
-#if defined(TARGET_PPC64)
-void glue(do_stmw_le_64, MEMSUFFIX) (int src)
-{
-    for (; src < 32; src++, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src]);
-    }
-}
-#endif
-
 void glue(do_lsw, MEMSUFFIX) (int dst)
 {
     uint32_t tmp;
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+        env->gpr[dst++] = glue(ldul, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
 
-#if defined(TARGET_PPC64)
-void glue(do_lsw_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ldl, MEMSUFFIX)((uint64_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-#endif
-
 void glue(do_stsw, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
+        glue(stl, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
 
-#if defined(TARGET_PPC64)
-void glue(do_stsw_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(stl, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 24; T1 > 0; T1--, T0++, sh -= 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-#endif
-
 void glue(do_lsw_le, MEMSUFFIX) (int dst)
 {
     uint32_t tmp;
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+        env->gpr[dst++] = glue(ldulr, MEMSUFFIX)(T0);
         if (unlikely(dst == 32))
             dst = 0;
     }
     if (unlikely(T1 != 0)) {
         tmp = 0;
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint32_t)T0) << sh;
+            tmp |= glue(ldub, MEMSUFFIX)(T0) << sh;
         }
         env->gpr[dst] = tmp;
     }
 }
 
-#if defined(TARGET_PPC64)
-void glue(do_lsw_le_64, MEMSUFFIX) (int dst)
-{
-    uint32_t tmp;
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        env->gpr[dst++] = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
-        if (unlikely(dst == 32))
-            dst = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        tmp = 0;
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8) {
-            tmp |= glue(ldub, MEMSUFFIX)((uint64_t)T0) << sh;
-        }
-        env->gpr[dst] = tmp;
-    }
-}
-#endif
-
 void glue(do_stsw_le, MEMSUFFIX) (int src)
 {
     int sh;
 
     for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint32_t)T0, env->gpr[src++]);
-        if (unlikely(src == 32))
-            src = 0;
-    }
-    if (unlikely(T1 != 0)) {
-        for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint32_t)T0, (env->gpr[src] >> sh) & 0xFF);
-    }
-}
-
-#if defined(TARGET_PPC64)
-void glue(do_stsw_le_64, MEMSUFFIX) (int src)
-{
-    int sh;
-
-    for (; T1 > 3; T1 -= 4, T0 += 4) {
-        glue(st32r, MEMSUFFIX)((uint64_t)T0, env->gpr[src++]);
+        glue(stlr, MEMSUFFIX)(T0, env->gpr[src++]);
         if (unlikely(src == 32))
             src = 0;
     }
     if (unlikely(T1 != 0)) {
         for (sh = 0; T1 > 0; T1--, T0++, sh += 8)
-            glue(stb, MEMSUFFIX)((uint64_t)T0, (env->gpr[src] >> sh) & 0xFF);
+            glue(stb, MEMSUFFIX)(T0, (env->gpr[src] >> sh) & 0xFF);
     }
 }
-#endif
 
 /* Instruction cache invalidation helper */
 void glue(do_icbi, MEMSUFFIX) (void)
@@ -252,28 +124,12 @@
      * (not a fetch) by the MMU. To be sure it will be so,
      * do the load "by hand".
      */
-    tmp = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+    tmp = glue(ldul, MEMSUFFIX)(T0);
     T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint32_t)T0,
-                             (uint32_t)(T0 + env->icache_line_size));
+    /* We assume it would not wrap around 2^32 on 32 bits targets */
+    tb_invalidate_page_range(T0, T0 + env->icache_line_size);
 }
 
-#if defined(TARGET_PPC64)
-void glue(do_icbi_64, MEMSUFFIX) (void)
-{
-    uint64_t tmp;
-    /* Invalidate one cache line :
-     * PowerPC specification says this is to be treated like a load
-     * (not a fetch) by the MMU. To be sure it will be so,
-     * do the load "by hand".
-     */
-    tmp = glue(ldq, MEMSUFFIX)((uint64_t)T0);
-    T0 &= ~(env->icache_line_size - 1);
-    tb_invalidate_page_range((uint64_t)T0,
-                             (uint64_t)(T0 + env->icache_line_size));
-}
-#endif
-
 void glue(do_dcbz, MEMSUFFIX) (void)
 {
     int dcache_line_size = env->dcache_line_size;
@@ -281,91 +137,44 @@
     /* XXX: should be 970 specific (?) */
     if (((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1)
         dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x1C), 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x00, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x04, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x08, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x0C, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x10, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x14, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x18, 0);
+    glue(stl, MEMSUFFIX)(T0 + 0x1C, 0);
     if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x3CUL), 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x20UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x24UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x28UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x2CUL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x30UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x34UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x38UL, 0);
+        glue(stl, MEMSUFFIX)(T0 + 0x3CUL, 0);
         if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint32_t)(T0 + 0x7CUL), 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x40UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x44UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x48UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x4CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x50UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x54UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x58UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x5CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x60UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x64UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x68UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x6CUL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x70UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x74UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x78UL, 0);
+            glue(stl, MEMSUFFIX)(T0 + 0x7CUL, 0);
         }
     }
 }
 
-#if defined(TARGET_PPC64)
-void glue(do_dcbz_64, MEMSUFFIX) (void)
-{
-    int dcache_line_size = env->dcache_line_size;
-
-    /* XXX: should be 970 specific (?) */
-    if (((env->spr[SPR_970_HID5] >> 6) & 0x3) == 0x2)
-        dcache_line_size = 32;
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x00), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x04), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x08), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x0C), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x10), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x14), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x18), 0);
-    glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x1C), 0);
-    if (dcache_line_size >= 64) {
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x20UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x24UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x28UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x2CUL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x30UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x34UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x38UL), 0);
-        glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x3CUL), 0);
-        if (dcache_line_size >= 128) {
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x40UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x44UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x48UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x4CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x50UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x54UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x58UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x5CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x60UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x64UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x68UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x6CUL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x70UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x74UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x78UL), 0);
-            glue(stl, MEMSUFFIX)((uint64_t)(T0 + 0x7CUL), 0);
-        }
-    }
-}
-#endif
-
 /* PowerPC 601 specific instructions (POWER bridge) */
 // XXX: to be tested
 void glue(do_POWER_lscbx, MEMSUFFIX) (int dest, int ra, int rb)
@@ -400,26 +209,6 @@
     FT1 = glue(ldfq, MEMSUFFIX)((uint32_t)(T0 + 4));
 }
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 void glue(do_POWER2_lfq_le, MEMSUFFIX) (void)
 {
     FT0 = glue(ldfqr, MEMSUFFIX)((uint32_t)(T0 + 4));
@@ -432,25 +221,6 @@
     glue(stfq, MEMSUFFIX)((uint32_t)(T0 + 4), FT1);
 }
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 void glue(do_POWER2_stfq_le, MEMSUFFIX) (void)
 {
     glue(stfqr, MEMSUFFIX)((uint32_t)(T0 + 4), FT0);
Index: target-ppc/op_mem.h
===================================================================
--- target-ppc/op_mem.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-ppc/op_mem.h	2007-10-15 15:41:10.000000000 +0000
@@ -18,85 +18,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-static always_inline uint16_t glue(ld16r, MEMSUFFIX) (target_ulong EA)
-{
-    uint16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline int32_t glue(ld16rs, MEMSUFFIX) (target_ulong EA)
-{
-    int16_t tmp = glue(lduw, MEMSUFFIX)(EA);
-    return (int16_t)((tmp & 0xFF00) >> 8) | ((tmp & 0x00FF) << 8);
-}
-
-static always_inline uint32_t glue(ld32r, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline uint64_t glue(ld64r, MEMSUFFIX) (target_ulong EA)
-{
-    uint64_t tmp = glue(ldq, MEMSUFFIX)(EA);
-    return ((tmp & 0xFF00000000000000ULL) >> 56) |
-        ((tmp & 0x00FF000000000000ULL) >> 40) |
-        ((tmp & 0x0000FF0000000000ULL) >> 24) |
-        ((tmp & 0x000000FF00000000ULL) >> 8) |
-        ((tmp & 0x00000000FF000000ULL) << 8) |
-        ((tmp & 0x0000000000FF0000ULL) << 24) |
-        ((tmp & 0x000000000000FF00ULL) << 40) |
-        ((tmp & 0x00000000000000FFULL) << 54);
-}
-#endif
-
-#if defined(TARGET_PPC64)
-static always_inline int64_t glue(ldsl, MEMSUFFIX) (target_ulong EA)
-{
-    return (int32_t)glue(ldl, MEMSUFFIX)(EA);
-}
-
-static always_inline int64_t glue(ld32rs, MEMSUFFIX) (target_ulong EA)
-{
-    uint32_t tmp = glue(ldl, MEMSUFFIX)(EA);
-    return (int32_t)((tmp & 0xFF000000) >> 24) | ((tmp & 0x00FF0000) >> 8) |
-        ((tmp & 0x0000FF00) << 8) | ((tmp & 0x000000FF) << 24);
-}
-#endif
-
-static always_inline void glue(st16r, MEMSUFFIX) (target_ulong EA,
-                                                  uint16_t data)
-{
-    uint16_t tmp = ((data & 0xFF00) >> 8) | ((data & 0x00FF) << 8);
-    glue(stw, MEMSUFFIX)(EA, tmp);
-}
-
-static always_inline void glue(st32r, MEMSUFFIX) (target_ulong EA,
-                                                  uint32_t data)
-{
-    uint32_t tmp = ((data & 0xFF000000) >> 24) | ((data & 0x00FF0000) >> 8) |
-        ((data & 0x0000FF00) << 8) | ((data & 0x000000FF) << 24);
-    glue(stl, MEMSUFFIX)(EA, tmp);
-}
-
-#if defined(TARGET_PPC64) || defined(TARGET_PPCEMB)
-static always_inline void glue(st64r, MEMSUFFIX) (target_ulong EA,
-                                                  uint64_t data)
-{
-    uint64_t tmp = ((data & 0xFF00000000000000ULL) >> 56) |
-        ((data & 0x00FF000000000000ULL) >> 40) |
-        ((data & 0x0000FF0000000000ULL) >> 24) |
-        ((data & 0x000000FF00000000ULL) >> 8) |
-        ((data & 0x00000000FF000000ULL) << 8) |
-        ((data & 0x0000000000FF0000ULL) << 24) |
-        ((data & 0x000000000000FF00ULL) << 40) |
-        ((data & 0x00000000000000FFULL) << 56);
-    glue(stq, MEMSUFFIX)(EA, tmp);
-}
-#endif
-
 /***                             Integer load                              ***/
 #define PPC_LD_OP(name, op)                                                   \
 void OPPROTO glue(glue(op_l, name), MEMSUFFIX) (void)                         \
@@ -130,10 +51,11 @@
 }
 #endif
 
+/* Native-endian fixed-point memory loads                                    */
 PPC_LD_OP(bz, ldub);
 PPC_LD_OP(ha, ldsw);
 PPC_LD_OP(hz, lduw);
-PPC_LD_OP(wz, ldl);
+PPC_LD_OP(wz, ldul);
 #if defined(TARGET_PPC64)
 PPC_LD_OP(d, ldq);
 PPC_LD_OP(wa, ldsl);
@@ -142,23 +64,24 @@
 PPC_LD_OP_64(bz, ldub);
 PPC_LD_OP_64(ha, ldsw);
 PPC_LD_OP_64(hz, lduw);
-PPC_LD_OP_64(wz, ldl);
+PPC_LD_OP_64(wz, ldul);
 #endif
 
-PPC_LD_OP(ha_le, ld16rs);
-PPC_LD_OP(hz_le, ld16r);
-PPC_LD_OP(wz_le, ld32r);
+/* Reverse-endian fixed-point memory loads                                   */
+PPC_LD_OP(ha_le, ldswr);
+PPC_LD_OP(hz_le, lduwr);
+PPC_LD_OP(wz_le, ldulr);
 #if defined(TARGET_PPC64)
-PPC_LD_OP(d_le, ld64r);
-PPC_LD_OP(wa_le, ld32rs);
-PPC_LD_OP_64(d_le, ld64r);
-PPC_LD_OP_64(wa_le, ld32rs);
-PPC_LD_OP_64(ha_le, ld16rs);
-PPC_LD_OP_64(hz_le, ld16r);
-PPC_LD_OP_64(wz_le, ld32r);
+PPC_LD_OP(d_le, ldqr);
+PPC_LD_OP(wa_le, ldslr);
+PPC_LD_OP_64(d_le, ldqr);
+PPC_LD_OP_64(wa_le, ldslr);
+PPC_LD_OP_64(ha_le, ldswr);
+PPC_LD_OP_64(hz_le, lduwr);
+PPC_LD_OP_64(wz_le, ldulr);
 #endif
 
-/***                              Integer store                            ***/
+/* Native-endian fixed-point memory stores                                   */
 PPC_ST_OP(b, stb);
 PPC_ST_OP(h, stw);
 PPC_ST_OP(w, stl);
@@ -170,120 +93,110 @@
 PPC_ST_OP_64(w, stl);
 #endif
 
-PPC_ST_OP(h_le, st16r);
-PPC_ST_OP(w_le, st32r);
+/* Reverse-endian fixed-point memory stores                                  */
+PPC_ST_OP(h_le, stwr);
+PPC_ST_OP(w_le, stlr);
+#if defined(TARGET_PPC64)
+PPC_ST_OP(d_le, stqr);
+PPC_ST_OP_64(d_le, stqr);
+PPC_ST_OP_64(h_le, stwr);
+PPC_ST_OP_64(w_le, stlr);
+#endif
+
+/* Native-endian fixed-point loads and stores with byte-reverse              */
+PPC_LD_OP(hbr, lduwr);
+PPC_LD_OP(wbr, ldulr);
+PPC_ST_OP(hbr, stwr);
+PPC_ST_OP(wbr, stlr);
 #if defined(TARGET_PPC64)
-PPC_ST_OP(d_le, st64r);
-PPC_ST_OP_64(d_le, st64r);
-PPC_ST_OP_64(h_le, st16r);
-PPC_ST_OP_64(w_le, st32r);
-#endif
-
-/***                Integer load and store with byte reverse               ***/
-PPC_LD_OP(hbr, ld16r);
-PPC_LD_OP(wbr, ld32r);
-PPC_ST_OP(hbr, st16r);
-PPC_ST_OP(wbr, st32r);
-#if defined(TARGET_PPC64)
-PPC_LD_OP_64(hbr, ld16r);
-PPC_LD_OP_64(wbr, ld32r);
-PPC_ST_OP_64(hbr, st16r);
-PPC_ST_OP_64(wbr, st32r);
+PPC_LD_OP_64(hbr, lduwr);
+PPC_LD_OP_64(wbr, ldulr);
+PPC_ST_OP_64(hbr, stwr);
+PPC_ST_OP_64(wbr, stlr);
 #endif
 
+/* Reverse-endian fixed-point loads and stores with byte-reverse             */
 PPC_LD_OP(hbr_le, lduw);
-PPC_LD_OP(wbr_le, ldl);
+PPC_LD_OP(wbr_le, ldul);
 PPC_ST_OP(hbr_le, stw);
 PPC_ST_OP(wbr_le, stl);
 #if defined(TARGET_PPC64)
 PPC_LD_OP_64(hbr_le, lduw);
-PPC_LD_OP_64(wbr_le, ldl);
+PPC_LD_OP_64(wbr_le, ldul);
 PPC_ST_OP_64(hbr_le, stw);
 PPC_ST_OP_64(wbr_le, stl);
 #endif
 
-/***                    Integer load and store multiple                    ***/
+/* Native-endian fixed-point loads and stores multiple                       */
 void OPPROTO glue(op_lmw, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
-{
-    glue(do_lmw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_64, MEMSUFFIX) (void)
 {
-    glue(do_lmw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_lmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
-void OPPROTO glue(op_stmw, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
 {
     glue(do_stmw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
+#endif
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_64, MEMSUFFIX) (void)
+/* Reverse-endian fixed-point loads and stores multiple                      */
+void OPPROTO glue(op_lmw_le, MEMSUFFIX) (void)
 {
-    glue(do_stmw_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
-#endif
 
 void OPPROTO glue(op_stmw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
-{
-    glue(do_stmw_le_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
-/***                    Integer load and store strings                     ***/
-void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
+void OPPROTO glue(op_lmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw, MEMSUFFIX)(PARAM1);
+    glue(do_lmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_stmw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+    glue(do_stmw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+/* Native-endian loads and stores string                                     */
+void OPPROTO glue(op_lswi, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
-void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+void OPPROTO glue(op_lswi_64, MEMSUFFIX) (void)
 {
-    glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+    T0 = (uint32_t)T0;
+    glue(do_lsw, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -303,6 +216,7 @@
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
@@ -320,13 +234,44 @@
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
+void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
+{
+    glue(do_stsw, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
+/* Reverse-endian loads and stores string                                    */
+void OPPROTO glue(op_lswi_le, MEMSUFFIX) (void)
+{
+    T0 = (uint32_t)T0;
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+
+#if defined(TARGET_PPC64)
+void OPPROTO glue(op_lswi_le_64, MEMSUFFIX) (void)
+{
+    glue(do_lsw_le, MEMSUFFIX)(PARAM1);
+    RETURN();
+}
+#endif
+
 void OPPROTO glue(op_lswx_le, MEMSUFFIX) (void)
 {
     /* Note: T1 comes from xer_bc then no cast is needed */
@@ -337,6 +282,7 @@
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
+            T0 = (uint32_t)T0;
             glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
@@ -354,29 +300,16 @@
                                    POWERPC_EXCP_INVAL |
                                    POWERPC_EXCP_INVAL_LSWX);
         } else {
-            glue(do_lsw_le_64, MEMSUFFIX)(PARAM1);
+            glue(do_lsw_le, MEMSUFFIX)(PARAM1);
         }
     }
     RETURN();
 }
 #endif
 
-void OPPROTO glue(op_stsw, MEMSUFFIX) (void)
-{
-    glue(do_stsw, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-
-#if defined(TARGET_PPC64)
-void OPPROTO glue(op_stsw_64, MEMSUFFIX) (void)
-{
-    glue(do_stsw_64, MEMSUFFIX)(PARAM1);
-    RETURN();
-}
-#endif
-
 void OPPROTO glue(op_stsw_le, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
@@ -384,7 +317,7 @@
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_stsw_le_64, MEMSUFFIX) (void)
 {
-    glue(do_stsw_le_64, MEMSUFFIX)(PARAM1);
+    glue(do_stsw_le, MEMSUFFIX)(PARAM1);
     RETURN();
 }
 #endif
@@ -432,38 +365,9 @@
 PPC_STF_OP_64(fiwx, stfiwx);
 #endif
 
-static always_inline void glue(stfqr, MEMSUFFIX) (target_ulong EA, double d)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = d;
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-    glue(stfq, MEMSUFFIX)(EA, u.d);
-}
-
 static always_inline void glue(stfsr, MEMSUFFIX) (target_ulong EA, double d)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = float64_to_float32(d, &env->fp_status);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stfl, MEMSUFFIX)(EA, u.f);
+    glue(stflr, MEMSUFFIX)(EA, float64_to_float32(d, &env->fp_status));
 }
 
 static always_inline void glue(stfiwxr, MEMSUFFIX) (target_ulong EA, double d)
@@ -475,11 +379,7 @@
 
     /* Store the low order 32 bits without any conversion */
     u.d = d;
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-    glue(stl, MEMSUFFIX)(EA, u.u);
+    glue(stlr, MEMSUFFIX)(EA, u.u);
 }
 
 PPC_STF_OP(fd_le, stfqr);
@@ -520,40 +420,9 @@
 PPC_LDF_OP_64(fs, ldfs);
 #endif
 
-static always_inline double glue(ldfqr, MEMSUFFIX) (target_ulong EA)
-{
-    union {
-        double d;
-        uint64_t u;
-    } u;
-
-    u.d = glue(ldfq, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF00000000000000ULL) >> 56) |
-        ((u.u & 0x00FF000000000000ULL) >> 40) |
-        ((u.u & 0x0000FF0000000000ULL) >> 24) |
-        ((u.u & 0x000000FF00000000ULL) >> 8) |
-        ((u.u & 0x00000000FF000000ULL) << 8) |
-        ((u.u & 0x0000000000FF0000ULL) << 24) |
-        ((u.u & 0x000000000000FF00ULL) << 40) |
-        ((u.u & 0x00000000000000FFULL) << 56);
-
-    return u.d;
-}
-
 static always_inline double glue(ldfsr, MEMSUFFIX) (target_ulong EA)
 {
-    union {
-        float f;
-        uint32_t u;
-    } u;
-
-    u.f = glue(ldfl, MEMSUFFIX)(EA);
-    u.u = ((u.u & 0xFF000000UL) >> 24) |
-        ((u.u & 0x00FF0000ULL) >> 8) |
-        ((u.u & 0x0000FF00UL) << 8) |
-        ((u.u & 0x000000FFULL) << 24);
-
-    return float32_to_float64(u.f, &env->fp_status);
+    return float32_to_float64(glue(ldflr, MEMSUFFIX)(EA), &env->fp_status);
 }
 
 PPC_LDF_OP(fd_le, ldfqr);
@@ -569,7 +438,7 @@
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -581,7 +450,7 @@
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -615,7 +484,7 @@
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -627,7 +496,7 @@
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -638,7 +507,7 @@
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint32_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
         env->reserve = (uint32_t)T0;
     }
     RETURN();
@@ -649,7 +518,7 @@
     if (unlikely(T0 & 0x03)) {
         do_raise_exception(POWERPC_EXCP_ALIGN);
     } else {
-        T1 = glue(ld64r, MEMSUFFIX)((uint64_t)T0);
+        T1 = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
         env->reserve = (uint64_t)T0;
     }
     RETURN();
@@ -731,7 +600,7 @@
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -748,7 +617,7 @@
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -764,7 +633,7 @@
         if (unlikely(env->reserve != (uint32_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint32_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint32_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -780,7 +649,7 @@
         if (unlikely(env->reserve != (uint64_t)T0)) {
             env->crf[0] = xer_so;
         } else {
-            glue(st64r, MEMSUFFIX)((uint64_t)T0, T1);
+            glue(stqr, MEMSUFFIX)((uint64_t)T0, T1);
             env->crf[0] = xer_so | 0x02;
         }
     }
@@ -862,6 +731,7 @@
 
 void OPPROTO glue(op_dcbz, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
@@ -940,7 +810,7 @@
 
 void OPPROTO glue(op_dcbz_64, MEMSUFFIX) (void)
 {
-    glue(do_dcbz_64, MEMSUFFIX)();
+    glue(do_dcbz, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -948,6 +818,7 @@
 /* Instruction cache block invalidate */
 void OPPROTO glue(op_icbi, MEMSUFFIX) (void)
 {
+    T0 = (uint32_t)T0;
     glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
@@ -955,7 +826,7 @@
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_icbi_64, MEMSUFFIX) (void)
 {
-    glue(do_icbi_64, MEMSUFFIX)();
+    glue(do_icbi, MEMSUFFIX)();
     RETURN();
 }
 #endif
@@ -963,14 +834,14 @@
 /* External access */
 void OPPROTO glue(op_eciwx, MEMSUFFIX) (void)
 {
-    T1 = glue(ldl, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldul, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ldl, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldul, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
@@ -991,28 +862,28 @@
 
 void OPPROTO glue(op_eciwx_le, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint32_t)T0);
+    T1 = glue(ldulr, MEMSUFFIX)((uint32_t)T0);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_eciwx_le_64, MEMSUFFIX) (void)
 {
-    T1 = glue(ld32r, MEMSUFFIX)((uint64_t)T0);
+    T1 = glue(ldulr, MEMSUFFIX)((uint64_t)T0);
     RETURN();
 }
 #endif
 
 void OPPROTO glue(op_ecowx_le, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint32_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint32_t)T0, T1);
     RETURN();
 }
 
 #if defined(TARGET_PPC64)
 void OPPROTO glue(op_ecowx_le_64, MEMSUFFIX) (void)
 {
-    glue(st32r, MEMSUFFIX)((uint64_t)T0, T1);
+    glue(stlr, MEMSUFFIX)((uint64_t)T0, T1);
     RETURN();
 }
 #endif
@@ -1070,8 +941,8 @@
 
 void OPPROTO glue(op_vr_lvx_le, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint32_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint32_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint32_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint32_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx, MEMSUFFIX) (void)
@@ -1082,8 +953,8 @@
 
 void OPPROTO glue(op_vr_stvx_le, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint32_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 
 #if defined(TARGET_PPC64)
@@ -1095,8 +966,8 @@
 
 void OPPROTO glue(op_vr_lvx_le_64, MEMSUFFIX) (void)
 {
-    AVR0.u64[VR_DWORD1] = glue(ldq, MEMSUFFIX)((uint64_t)T0);
-    AVR0.u64[VR_DWORD0] = glue(ldq, MEMSUFFIX)((uint64_t)T0 + 8);
+    AVR0.u64[VR_DWORD1] = glue(ldqr, MEMSUFFIX)((uint64_t)T0);
+    AVR0.u64[VR_DWORD0] = glue(ldqr, MEMSUFFIX)((uint64_t)T0 + 8);
 }
 
 void OPPROTO glue(op_vr_stvx_64, MEMSUFFIX) (void)
@@ -1107,8 +978,8 @@
 
 void OPPROTO glue(op_vr_stvx_le_64, MEMSUFFIX) (void)
 {
-    glue(stq, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
-    glue(stq, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0, AVR0.u64[VR_DWORD1]);
+    glue(stqr, MEMSUFFIX)((uint64_t)T0 + 8, AVR0.u64[VR_DWORD0]);
 }
 #endif
 #undef VR_DWORD0
@@ -1163,14 +1034,14 @@
 #if !defined(TARGET_PPC64)
 PPC_SPE_LD_OP(dd, ldq);
 PPC_SPE_ST_OP(dd, stq);
-PPC_SPE_LD_OP(dd_le, ld64r);
-PPC_SPE_ST_OP(dd_le, st64r);
+PPC_SPE_LD_OP(dd_le, ldqr);
+PPC_SPE_ST_OP(dd_le, stqr);
 #endif
 static always_inline uint64_t glue(spe_ldw, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ldl, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ldl, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldul, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldul, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw, spe_ldw);
@@ -1184,16 +1055,16 @@
 static always_inline uint64_t glue(spe_ldw_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld32r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld32r, MEMSUFFIX)(EA + 4);
+    ret = (uint64_t)glue(ldulr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(ldulr, MEMSUFFIX)(EA + 4);
     return ret;
 }
 PPC_SPE_LD_OP(dw_le, spe_ldw_le);
 static always_inline void glue(spe_stdw_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data >> 32);
-    glue(st32r, MEMSUFFIX)(EA + 4, data);
+    glue(stlr, MEMSUFFIX)(EA, data >> 32);
+    glue(stlr, MEMSUFFIX)(EA + 4, data);
 }
 PPC_SPE_ST_OP(dw_le, spe_stdw_le);
 static always_inline uint64_t glue(spe_ldh, MEMSUFFIX) (target_ulong EA)
@@ -1218,20 +1089,20 @@
 static always_inline uint64_t glue(spe_ldh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 4) << 16;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 6);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 4) << 16;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 6);
     return ret;
 }
 PPC_SPE_LD_OP(dh_le, spe_ldh_le);
 static always_inline void glue(spe_stdh_le, MEMSUFFIX) (target_ulong EA,
                                                         uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 4, data >> 16);
-    glue(st16r, MEMSUFFIX)(EA + 6, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 4, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA + 6, data);
 }
 PPC_SPE_ST_OP(dh_le, spe_stdh_le);
 static always_inline uint64_t glue(spe_lwhe, MEMSUFFIX) (target_ulong EA)
@@ -1252,16 +1123,16 @@
 static always_inline uint64_t glue(spe_lwhe_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 48;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2) << 16;
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 48;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2) << 16;
     return ret;
 }
 PPC_SPE_LD_OP(whe_le, spe_lwhe_le);
 static always_inline void glue(spe_stwhe_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 48);
-    glue(st16r, MEMSUFFIX)(EA + 2, data >> 16);
+    glue(stwr, MEMSUFFIX)(EA, data >> 48);
+    glue(stwr, MEMSUFFIX)(EA + 2, data >> 16);
 }
 PPC_SPE_ST_OP(whe_le, spe_stwhe_le);
 static always_inline uint64_t glue(spe_lwhou, MEMSUFFIX) (target_ulong EA)
@@ -1290,24 +1161,24 @@
 static always_inline uint64_t glue(spe_lwhou_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = (uint64_t)glue(ld16r, MEMSUFFIX)(EA) << 32;
-    ret |= (uint64_t)glue(ld16r, MEMSUFFIX)(EA + 2);
+    ret = (uint64_t)glue(lduwr, MEMSUFFIX)(EA) << 32;
+    ret |= (uint64_t)glue(lduwr, MEMSUFFIX)(EA + 2);
     return ret;
 }
 PPC_SPE_LD_OP(whou_le, spe_lwhou_le);
 static always_inline uint64_t glue(spe_lwhos_le, MEMSUFFIX) (target_ulong EA)
 {
     uint64_t ret;
-    ret = ((uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA))) << 32;
-    ret |= (uint64_t)((int32_t)glue(ld16rs, MEMSUFFIX)(EA + 2));
+    ret = ((uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA))) << 32;
+    ret |= (uint64_t)((int32_t)glue(ldswr, MEMSUFFIX)(EA + 2));
     return ret;
 }
 PPC_SPE_LD_OP(whos_le, spe_lwhos_le);
 static always_inline void glue(spe_stwho_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st16r, MEMSUFFIX)(EA, data >> 32);
-    glue(st16r, MEMSUFFIX)(EA + 2, data);
+    glue(stwr, MEMSUFFIX)(EA, data >> 32);
+    glue(stwr, MEMSUFFIX)(EA + 2, data);
 }
 PPC_SPE_ST_OP(who_le, spe_stwho_le);
 #if !defined(TARGET_PPC64)
@@ -1320,7 +1191,7 @@
 static always_inline void glue(spe_stwwo_le, MEMSUFFIX) (target_ulong EA,
                                                          uint64_t data)
 {
-    glue(st32r, MEMSUFFIX)(EA, data);
+    glue(stlr, MEMSUFFIX)(EA, data);
 }
 PPC_SPE_ST_OP(wwo_le, spe_stwwo_le);
 #endif
@@ -1334,14 +1205,14 @@
 static always_inline uint64_t glue(spe_lh_le, MEMSUFFIX) (target_ulong EA)
 {
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 48) | ((uint64_t)tmp << 16);
 }
 PPC_SPE_LD_OP(h_le, spe_lh_le);
 static always_inline uint64_t glue(spe_lwwsplat, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ldl, MEMSUFFIX)(EA);
+    tmp = glue(ldul, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat, spe_lwwsplat);
@@ -1349,7 +1220,7 @@
 uint64_t glue(spe_lwwsplat_le, MEMSUFFIX) (target_ulong EA)
 {
     uint32_t tmp;
-    tmp = glue(ld32r, MEMSUFFIX)(EA);
+    tmp = glue(ldulr, MEMSUFFIX)(EA);
     return ((uint64_t)tmp << 32) | (uint64_t)tmp;
 }
 PPC_SPE_LD_OP(wwsplat_le, spe_lwwsplat_le);
@@ -1369,9 +1240,9 @@
 {
     uint64_t ret;
     uint16_t tmp;
-    tmp = glue(ld16r, MEMSUFFIX)(EA);
+    tmp = glue(lduwr, MEMSUFFIX)(EA);
     ret = ((uint64_t)tmp << 48) | ((uint64_t)tmp << 32);
-    tmp = glue(ld16r, MEMSUFFIX)(EA + 2);
+    tmp = glue(lduwr, MEMSUFFIX)(EA + 2);
     ret |= ((uint64_t)tmp << 16) | (uint64_t)tmp;
     return ret;
 }
Index: target-ppc/translate.c
===================================================================
--- target-ppc/translate.c.orig	2007-10-14 16:44:20.000000000 +0000
+++ target-ppc/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -6756,7 +6756,7 @@
                     ctx.nip, 1 - msr_pr, msr_ir);
         }
 #endif
-        ctx.opcode = ldl_code(ctx.nip);
+        ctx.opcode = ldul_code(ctx.nip);
         if (msr_le) {
             ctx.opcode = ((ctx.opcode & 0xFF000000) >> 24) |
                 ((ctx.opcode & 0x00FF0000) >> 8) |
Index: target-sh4/exec.h
===================================================================
--- target-sh4/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-sh4/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -48,6 +48,9 @@
 
 #ifndef CONFIG_USER_ONLY
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif
 
 #define RETURN() __asm__ __volatile__("")
Index: target-sh4/op_helper.c
===================================================================
--- target-sh4/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-sh4/op_helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -30,6 +30,7 @@
 #define MMUSUFFIX _mmu
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -42,6 +43,21 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* Reverse-endian */
+#define REVERSE_ENDIAN
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+#undef REVERSE_ENDIAN
+
 void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr)
 {
     TranslationBlock *tb;
Index: target-sh4/op_mem.c
===================================================================
--- target-sh4/op_mem.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-sh4/op_mem.c	2007-10-15 15:41:10.000000000 +0000
@@ -48,7 +48,7 @@
 }
 
 void glue(op_ldl_T0_T0, MEMSUFFIX) (void) {
-    T0 = glue(ldl, MEMSUFFIX) (T0);
+    T0 = glue(ldul, MEMSUFFIX) (T0);
     RETURN();
 }
 
Index: target-sparc/exec.h
===================================================================
--- target-sparc/exec.h.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-sparc/exec.h	2007-10-15 15:41:10.000000000 +0000
@@ -100,6 +100,9 @@
 /* XXX: move that to a generic header */
 #if !defined(CONFIG_USER_ONLY)
 #include "softmmu_exec.h"
+#define REVERSE_ENDIAN
+#include "softmmu_exec.h"
+#undef REVERSE_ENDIAN
 #endif /* !defined(CONFIG_USER_ONLY) */
 
 static inline void env_to_regs(void)
Index: target-sparc/helper.c
===================================================================
--- target-sparc/helper.c.orig	2007-10-14 13:00:06.000000000 +0000
+++ target-sparc/helper.c	2007-10-15 15:41:10.000000000 +0000
@@ -130,7 +130,7 @@
     /* SPARC reference MMU table walk: Context table->L1->L2->PTE */
     /* Context base + context number */
     pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
 
     /* Ctx pde */
     switch (pde & PTE_ENTRYTYPE_MASK) {
@@ -142,7 +142,7 @@
         return 4 << 2;
     case 1: /* L0 PDE */
         pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
-        pde = ldl_phys(pde_ptr);
+        pde = ldul_phys(pde_ptr);
 
         switch (pde & PTE_ENTRYTYPE_MASK) {
         default:
@@ -152,7 +152,7 @@
             return (1 << 8) | (4 << 2);
         case 1: /* L1 PDE */
             pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
-            pde = ldl_phys(pde_ptr);
+            pde = ldul_phys(pde_ptr);
 
             switch (pde & PTE_ENTRYTYPE_MASK) {
             default:
@@ -162,7 +162,7 @@
                 return (2 << 8) | (4 << 2);
             case 1: /* L2 PDE */
                 pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
-                pde = ldl_phys(pde_ptr);
+                pde = ldul_phys(pde_ptr);
 
                 switch (pde & PTE_ENTRYTYPE_MASK) {
                 default:
@@ -266,7 +266,7 @@
     /* Context base + context number */
     pde_ptr = (target_phys_addr_t)(env->mmuregs[1] << 4) +
         (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
 
     switch (pde & PTE_ENTRYTYPE_MASK) {
     default:
@@ -278,7 +278,7 @@
         if (mmulev == 3)
             return pde;
         pde_ptr = ((address >> 22) & ~3) + ((pde & ~3) << 4);
-        pde = ldl_phys(pde_ptr);
+        pde = ldul_phys(pde_ptr);
 
         switch (pde & PTE_ENTRYTYPE_MASK) {
         default:
@@ -291,7 +291,7 @@
             if (mmulev == 2)
                 return pde;
             pde_ptr = ((address & 0xfc0000) >> 16) + ((pde & ~3) << 4);
-            pde = ldl_phys(pde_ptr);
+            pde = ldul_phys(pde_ptr);
 
             switch (pde & PTE_ENTRYTYPE_MASK) {
             default:
@@ -304,7 +304,7 @@
                 if (mmulev == 1)
                     return pde;
                 pde_ptr = ((address & 0x3f000) >> 10) + ((pde & ~3) << 4);
-                pde = ldl_phys(pde_ptr);
+                pde = ldul_phys(pde_ptr);
 
                 switch (pde & PTE_ENTRYTYPE_MASK) {
                 default:
@@ -331,7 +331,7 @@
 
     printf("MMU dump:\n");
     pde_ptr = (env->mmuregs[1] << 4) + (env->mmuregs[2] << 2);
-    pde = ldl_phys(pde_ptr);
+    pde = ldul_phys(pde_ptr);
     printf("Root ptr: " TARGET_FMT_plx ", ctx: %d\n",
            (target_phys_addr_t)env->mmuregs[1] << 4, env->mmuregs[2]);
     for (n = 0, va = 0; n < 256; n++, va += 16 * 1024 * 1024) {
Index: target-sparc/op_helper.c
===================================================================
--- target-sparc/op_helper.c.orig	2007-10-15 15:40:22.000000000 +0000
+++ target-sparc/op_helper.c	2007-10-15 15:42:33.000000000 +0000
@@ -241,11 +241,11 @@
             break;
         default:
         case 4:
-            ret = ldl_code(T0 & ~3);
+            ret = ldul_code(T0 & ~3);
             break;
         case 8:
-            ret = ldl_code(T0 & ~3);
-            T0 = ldl_code((T0 + 4) & ~3);
+            ret = ldul_code(T0 & ~3);
+            T0 = ldul_code((T0 + 4) & ~3);
             break;
         }
         break;
@@ -259,11 +259,11 @@
             break;
         default:
         case 4:
-            ret = ldl_user(T0 & ~3);
+            ret = ldul_user(T0 & ~3);
             break;
         case 8:
-            ret = ldl_user(T0 & ~3);
-            T0 = ldl_user((T0 + 4) & ~3);
+            ret = ldul_user(T0 & ~3);
+            T0 = ldul_user((T0 + 4) & ~3);
             break;
         }
         break;
@@ -277,11 +277,11 @@
             break;
         default:
         case 4:
-            ret = ldl_kernel(T0 & ~3);
+            ret = ldul_kernel(T0 & ~3);
             break;
         case 8:
-            ret = ldl_kernel(T0 & ~3);
-            T0 = ldl_kernel((T0 + 4) & ~3);
+            ret = ldul_kernel(T0 & ~3);
+            T0 = ldul_kernel((T0 + 4) & ~3);
             break;
         }
         break;
@@ -300,11 +300,11 @@
             break;
         default:
         case 4:
-            ret = ldl_phys(T0 & ~3);
+            ret = ldul_phys(T0 & ~3);
             break;
         case 8:
-            ret = ldl_phys(T0 & ~3);
-            T0 = ldl_phys((T0 + 4) & ~3);
+            ret = ldul_phys(T0 & ~3);
+            T0 = ldul_phys((T0 + 4) & ~3);
             break;
         }
         break;
@@ -321,13 +321,13 @@
             break;
         default:
         case 4:
-            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32));
+            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
+                            | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         case 8:
-            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32));
-            T0 = ldl_phys((target_phys_addr_t)((T0 + 4) & ~3)
+            ret = ldul_phys((target_phys_addr_t)(T0 & ~3)
+                            | ((target_phys_addr_t)(asi & 0xf) << 32));
+            T0 = ldul_phys((target_phys_addr_t)((T0 + 4) & ~3)
                            | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         }
@@ -557,7 +557,7 @@
             uint32_t src = T1 & ~3, dst = T0 & ~3, temp;
 
             for (i = 0; i < 32; i += 4, src += 4, dst += 4) {
-                temp = ldl_kernel(src);
+                temp = ldul_kernel(src);
                 stl_kernel(dst, temp);
             }
         }
@@ -660,7 +660,7 @@
                 ret = lduw_raw(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_raw(T0 & ~3);
+                ret = ldul_raw(T0 & ~3);
                 break;
             default:
             case 8:
@@ -810,7 +810,7 @@
                     ret = lduw_hypv(T0 & ~1);
                     break;
                 case 4:
-                    ret = ldl_hypv(T0 & ~3);
+                    ret = ldul_hypv(T0 & ~3);
                     break;
                 default:
                 case 8:
@@ -826,7 +826,7 @@
                     ret = lduw_kernel(T0 & ~1);
                     break;
                 case 4:
-                    ret = ldl_kernel(T0 & ~3);
+                    ret = ldul_kernel(T0 & ~3);
                     break;
                 default:
                 case 8:
@@ -843,7 +843,7 @@
                 ret = lduw_user(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_user(T0 & ~3);
+                ret = ldul_user(T0 & ~3);
                 break;
             default:
             case 8:
@@ -865,7 +865,7 @@
                 ret = lduw_phys(T0 & ~1);
                 break;
             case 4:
-                ret = ldl_phys(T0 & ~3);
+                ret = ldul_phys(T0 & ~3);
                 break;
             default:
             case 8:
@@ -1670,6 +1670,21 @@
 #define ALIGNED_ONLY
 #define GETPC() (__builtin_return_address(0))
 
+/* Native-endian */
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* Reverse-endian */
+#define REVERSE_ENDIAN
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1681,6 +1696,7 @@
 
 #define SHIFT 3
 #include "softmmu_template.h"
+#undef REVERSE_ENDIAN
 
 static void do_unaligned_access(target_ulong addr, int is_write, int is_user,
                                 void *retaddr)
Index: target-sparc/op_mem.h
===================================================================
--- target-sparc/op_mem.h.orig	2007-10-14 20:29:26.000000000 +0000
+++ target-sparc/op_mem.h	2007-10-15 15:59:17.000000000 +0000
@@ -23,7 +23,7 @@
     glue(op, MEMSUFFIX)(ADDR(T0), T1);                                      \
 }
 
-SPARC_LD_OP(ld, ldl);
+SPARC_LD_OP(ld, ldul);
 SPARC_LD_OP(ldub, ldub);
 SPARC_LD_OP(lduh, lduw);
 SPARC_LD_OP_S(ldsb, ldsb);
@@ -48,15 +48,15 @@
 
 void OPPROTO glue(op_swap, MEMSUFFIX)(void)
 {
-    target_ulong tmp = glue(ldl, MEMSUFFIX)(ADDR(T0));
+    target_ulong tmp = glue(ldul, MEMSUFFIX)(ADDR(T0));
     glue(stl, MEMSUFFIX)(ADDR(T0), T1);       /* XXX: Should be Atomically */
     T1 = tmp;
 }
 
 void OPPROTO glue(op_ldd, MEMSUFFIX)(void)
 {
-    T1 = glue(ldl, MEMSUFFIX)(ADDR(T0));
-    T0 = glue(ldl, MEMSUFFIX)((ADDR(T0 + 4)));
+    T1 = glue(ldul, MEMSUFFIX)(ADDR(T0));
+    T0 = glue(ldul, MEMSUFFIX)((ADDR(T0 + 4)));
 }
 
 /***                         Floating-point store                          ***/
@@ -84,12 +84,12 @@
 #ifdef TARGET_SPARC64
 void OPPROTO glue(op_lduw, MEMSUFFIX)(void)
 {
-    T1 = (uint64_t)(glue(ldl, MEMSUFFIX)(ADDR(T0)) & 0xffffffff);
+    T1 = (uint64_t)(glue(ldul, MEMSUFFIX)(ADDR(T0)) & 0xffffffff);
 }
 
 void OPPROTO glue(op_ldsw, MEMSUFFIX)(void)
 {
-    T1 = (int64_t)(glue(ldl, MEMSUFFIX)(ADDR(T0)) & 0xffffffff);
+    T1 = (int64_t)(glue(ldul, MEMSUFFIX)(ADDR(T0)) & 0xffffffff);
 }
 
 SPARC_LD_OP(ldx, ldq);
Index: target-sparc/translate.c
===================================================================
--- target-sparc/translate.c.orig	2007-10-14 16:47:11.000000000 +0000
+++ target-sparc/translate.c	2007-10-15 15:41:10.000000000 +0000
@@ -1096,7 +1096,7 @@
 {
     unsigned int insn, opc, rs1, rs2, rd;
 
-    insn = ldl_code(dc->pc);
+    insn = ldul_code(dc->pc);
     opc = GET_FIELD(insn, 0, 1);
 
     rd = GET_FIELD(insn, 2, 6);

  reply	other threads:[~2007-10-15 16:02 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-14 11:49 [Qemu-devel] RFC: reverse-endian softmmu memory accessors J. Mayer
2007-10-14 12:59 ` Blue Swirl
2007-10-15 12:10   ` J. Mayer
2007-10-15 16:02     ` Blue Swirl [this message]
2007-10-15 17:45       ` Blue Swirl
2007-10-16 20:27         ` J. Mayer
2007-11-23 12:55           ` Tero Kaarlela
2007-10-15 21:06       ` J. Mayer
  -- strict thread matches above, loose matches on Subject: below --
2007-10-13  9:56 J. Mayer
2007-10-13 10:47 ` Blue Swirl
2007-10-13 12:43   ` J. Mayer
2007-10-13 13:07     ` Blue Swirl
2007-10-13 14:17       ` J. Mayer
2007-10-13 22:07         ` J. Mayer
2007-10-13 22:53           ` Thiemo Seufer
2007-10-14  8:19           ` Blue Swirl
2007-10-14 10:14             ` J. Mayer
2007-10-14 13:22               ` Thiemo Seufer
2007-10-15 11:55                 ` J. Mayer
2007-10-13 13:02   ` Thiemo Seufer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f43fc5580710150902l39848603q95b36c9f734295f1@mail.gmail.com \
    --to=blauwirbel@gmail.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.