All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/5] tcg: Make bswap support in qemu_ld/st optional
@ 2018-11-20 12:15 Richard Henderson
  2018-11-20 12:15 ` [Qemu-devel] [PATCH 1/5] tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP Richard Henderson
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Richard Henderson @ 2018-11-20 12:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alistair.Francis

Having to support bswap in the guest memory operations at times
requires an irritating number of temporaries.  Better to allow
the bswap operations to be expanded outside of the memory ops
and let register allocation do its thing.

This is something that I've been planning to do for some time,
to aid i386 and arm32 especially.  But I've been prompted again
by the nacent risc-v backend, for which this would be especially
irritating to have to open-code.

Alistair, for convenience I've pushed this to 

  https://github.com/rth7680/qemu.git tcg-next-for-4.0


r~


Richard Henderson (5):
  tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP
  tcg/optimize: Optimize bswap
  tcg/i386: Set TCG_TARGET_HAS_MEMORY_BSWAP with have_movbe
  tcg/aarch64: Set TCG_TARGET_HAS_MEMORY_BSWAP to false
  tcg/arm: Set TCG_TARGET_HAS_MEMORY_BSWAP to false

 tcg/aarch64/tcg-target.h     |   1 +
 tcg/arm/tcg-target.h         |   1 +
 tcg/i386/tcg-target.h        |   3 +
 tcg/mips/tcg-target.h        |   1 +
 tcg/ppc/tcg-target.h         |   1 +
 tcg/s390/tcg-target.h        |   1 +
 tcg/sparc/tcg-target.h       |   1 +
 tcg/tci/tcg-target.h         |   2 +
 tcg/aarch64/tcg-target.inc.c |  63 +++--------
 tcg/arm/tcg-target.inc.c     | 198 +++++++++--------------------------
 tcg/i386/tcg-target.inc.c    | 112 +++++++-------------
 tcg/optimize.c               |  12 +++
 tcg/tcg-op.c                 | 118 ++++++++++++++++++++-
 13 files changed, 234 insertions(+), 280 deletions(-)

-- 
2.17.2

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PATCH 1/5] tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP
  2018-11-20 12:15 [Qemu-devel] [PATCH 0/5] tcg: Make bswap support in qemu_ld/st optional Richard Henderson
@ 2018-11-20 12:15 ` Richard Henderson
  2018-11-20 12:15 ` [Qemu-devel] [PATCH 2/5] tcg/optimize: Optimize bswap Richard Henderson
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2018-11-20 12:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alistair.Francis

For now, defined universally as true, since we previously required
backends to implement swapped memory operations.  Future patches
may now remove that support where it is onerous.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/aarch64/tcg-target.h |   1 +
 tcg/arm/tcg-target.h     |   1 +
 tcg/i386/tcg-target.h    |   2 +
 tcg/mips/tcg-target.h    |   1 +
 tcg/ppc/tcg-target.h     |   1 +
 tcg/s390/tcg-target.h    |   1 +
 tcg/sparc/tcg-target.h   |   1 +
 tcg/tci/tcg-target.h     |   2 +
 tcg/tcg-op.c             | 118 ++++++++++++++++++++++++++++++++++++++-
 9 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 9aea1d1771..f966a4fcb3 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -137,6 +137,7 @@ typedef enum {
 #define TCG_TARGET_HAS_mul_vec          1
 
 #define TCG_TARGET_DEFAULT_MO (0)
+#define TCG_TARGET_HAS_MEMORY_BSWAP     1
 
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 94b3578c55..16172f73a3 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -131,6 +131,7 @@ enum {
 };
 
 #define TCG_TARGET_DEFAULT_MO (0)
+#define TCG_TARGET_HAS_MEMORY_BSWAP     1
 
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 2441658865..b1b861f8f2 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -219,6 +219,8 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,
 
 #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
 
+#define TCG_TARGET_HAS_MEMORY_BSWAP  1
+
 #ifdef CONFIG_SOFTMMU
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index a8222476f0..5cb8672470 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -203,6 +203,7 @@ extern bool use_mips32r2_instructions;
 #endif
 
 #define TCG_TARGET_DEFAULT_MO (0)
+#define TCG_TARGET_HAS_MEMORY_BSWAP     1
 
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index be52ad1d2e..52c1bb04b1 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -128,6 +128,7 @@ void flush_icache_range(uintptr_t start, uintptr_t stop);
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
 
 #define TCG_TARGET_DEFAULT_MO (0)
+#define TCG_TARGET_HAS_MEMORY_BSWAP     1
 
 #ifdef CONFIG_SOFTMMU
 #define TCG_TARGET_NEED_LDST_LABELS
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 6f2b06a7d1..853ed6e7aa 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -135,6 +135,7 @@ extern uint64_t s390_facilities;
 #define TCG_TARGET_CALL_STACK_OFFSET	160
 
 #define TCG_TARGET_EXTEND_ARGS 1
+#define TCG_TARGET_HAS_MEMORY_BSWAP   1
 
 #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
 
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index d8339bf010..a0ed2a3342 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -164,6 +164,7 @@ extern bool use_vis3_instructions;
 #define TCG_AREG0 TCG_REG_I0
 
 #define TCG_TARGET_DEFAULT_MO (0)
+#define TCG_TARGET_HAS_MEMORY_BSWAP     1
 
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 26140d78cb..086f34e69a 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -198,6 +198,8 @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
    We prefer consistency across hosts on this.  */
 #define TCG_TARGET_DEFAULT_MO  (0)
 
+#define TCG_TARGET_HAS_MEMORY_BSWAP     1
+
 static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,
                                             uintptr_t jmp_addr, uintptr_t addr)
 {
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 7a8015c5a9..56e36de7ce 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2701,25 +2701,78 @@ static void tcg_gen_req_mo(TCGBar type)
 
 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
+    TCGMemOp orig_memop;
+
     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     memop = tcg_canonicalize_memop(memop, 0, 0);
     trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
                                addr, trace_mem_get_info(memop, 0));
+
+    orig_memop = memop;
+    if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
+        memop &= ~MO_BSWAP;
+        /* The bswap primitive requires zero-extended input.  */
+        if ((memop & MO_SSIZE) == MO_SW) {
+            memop &= ~MO_SIGN;
+        }
+    }
+
     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
+
+    if ((orig_memop ^ memop) & MO_BSWAP) {
+        switch (orig_memop & MO_SIZE) {
+        case MO_16:
+            tcg_gen_bswap16_i32(val, val);
+            if (orig_memop & MO_SIGN) {
+                tcg_gen_ext16s_i32(val, val);
+            }
+            break;
+        case MO_32:
+            tcg_gen_bswap32_i32(val, val);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
 }
 
 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
+    TCGv_i32 swap = NULL;
+
     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     memop = tcg_canonicalize_memop(memop, 0, 1);
     trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
                                addr, trace_mem_get_info(memop, 1));
+
+    if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
+        swap = tcg_temp_new_i32();
+        switch (memop & MO_SIZE) {
+        case MO_16:
+            tcg_gen_ext16u_i32(swap, val);
+            tcg_gen_bswap16_i32(swap, swap);
+            break;
+        case MO_32:
+            tcg_gen_bswap32_i32(swap, val);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        val = swap;
+        memop &= ~MO_BSWAP;
+    }
+
     gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
+
+    if (swap) {
+        tcg_temp_free_i32(swap);
+    }
 }
 
 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
-    tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+    TCGMemOp orig_memop;
+
     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
         if (memop & MO_SIGN) {
@@ -2730,24 +2783,85 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
         return;
     }
 
+    tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     memop = tcg_canonicalize_memop(memop, 1, 0);
     trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
                                addr, trace_mem_get_info(memop, 0));
+
+    orig_memop = memop;
+    if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
+        memop &= ~MO_BSWAP;
+        /* The bswap primitive requires zero-extended input.  */
+        if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
+            memop &= ~MO_SIGN;
+        }
+    }
+
     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
+
+    if ((orig_memop ^ memop) & MO_BSWAP) {
+        switch (orig_memop & MO_SIZE) {
+        case MO_16:
+            tcg_gen_bswap16_i64(val, val);
+            if (orig_memop & MO_SIGN) {
+                tcg_gen_ext16s_i64(val, val);
+            }
+            break;
+        case MO_32:
+            tcg_gen_bswap32_i64(val, val);
+            if (orig_memop & MO_SIGN) {
+                tcg_gen_ext32s_i64(val, val);
+            }
+            break;
+        case MO_64:
+            tcg_gen_bswap64_i64(val, val);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
 }
 
 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
-    tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+    TCGv_i64 swap = NULL;
+
     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
         return;
     }
 
+    tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     memop = tcg_canonicalize_memop(memop, 1, 1);
     trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
                                addr, trace_mem_get_info(memop, 1));
+
+    if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
+        swap = tcg_temp_new_i64();
+        switch (memop & MO_SIZE) {
+        case MO_16:
+            tcg_gen_ext16u_i64(swap, val);
+            tcg_gen_bswap16_i64(swap, swap);
+            break;
+        case MO_32:
+            tcg_gen_ext32u_i64(swap, val);
+            tcg_gen_bswap32_i64(swap, swap);
+            break;
+        case MO_64:
+            tcg_gen_bswap64_i64(swap, val);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        val = swap;
+        memop &= ~MO_BSWAP;
+    }
+
     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
+
+    if (swap) {
+        tcg_temp_free_i64(swap);
+    }
 }
 
 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, TCGMemOp opc)
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PATCH 2/5] tcg/optimize: Optimize bswap
  2018-11-20 12:15 [Qemu-devel] [PATCH 0/5] tcg: Make bswap support in qemu_ld/st optional Richard Henderson
  2018-11-20 12:15 ` [Qemu-devel] [PATCH 1/5] tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP Richard Henderson
@ 2018-11-20 12:15 ` Richard Henderson
  2018-11-20 12:15 ` [Qemu-devel] [PATCH 3/5] tcg/i386: Set TCG_TARGET_HAS_MEMORY_BSWAP with have_movbe Richard Henderson
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2018-11-20 12:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alistair.Francis

Somehow we forgot these operations, once upon a time.
This will allow immediate stores to have their bswap
optimized away.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 5dbe11c3c8..6b98ec13e6 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -353,6 +353,15 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
     CASE_OP_32_64(ext16u):
         return (uint16_t)x;
 
+    CASE_OP_32_64(bswap16):
+        return bswap16(x);
+
+    CASE_OP_32_64(bswap32):
+        return bswap32(x);
+
+    case INDEX_op_bswap64_i64:
+        return bswap64(x);
+
     case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         return (int32_t)x;
@@ -1105,6 +1114,9 @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(ext16s):
         CASE_OP_32_64(ext16u):
         CASE_OP_32_64(ctpop):
+        CASE_OP_32_64(bswap16):
+        CASE_OP_32_64(bswap32):
+        case INDEX_op_bswap64_i64:
         case INDEX_op_ext32s_i64:
         case INDEX_op_ext32u_i64:
         case INDEX_op_ext_i32_i64:
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PATCH 3/5] tcg/i386: Set TCG_TARGET_HAS_MEMORY_BSWAP with have_movbe
  2018-11-20 12:15 [Qemu-devel] [PATCH 0/5] tcg: Make bswap support in qemu_ld/st optional Richard Henderson
  2018-11-20 12:15 ` [Qemu-devel] [PATCH 1/5] tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP Richard Henderson
  2018-11-20 12:15 ` [Qemu-devel] [PATCH 2/5] tcg/optimize: Optimize bswap Richard Henderson
@ 2018-11-20 12:15 ` Richard Henderson
  2018-11-20 12:15 ` [Qemu-devel] [PATCH 4/5] tcg/aarch64: Set TCG_TARGET_HAS_MEMORY_BSWAP to false Richard Henderson
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2018-11-20 12:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alistair.Francis

This allows us to remove some code from the backend, allowing
the generic code to emit any extra bswaps.

This does not quite allow all of the cleanup that should be
possible, as we still must take care of i386 storing bytes
from non 'q' registers.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target.h     |   3 +-
 tcg/i386/tcg-target.inc.c | 112 ++++++++++++--------------------------
 2 files changed, 37 insertions(+), 78 deletions(-)

diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index b1b861f8f2..ed2d5d4441 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -101,6 +101,7 @@ extern bool have_bmi1;
 extern bool have_popcnt;
 extern bool have_avx1;
 extern bool have_avx2;
+extern bool have_movbe;
 
 /* optional instructions */
 #define TCG_TARGET_HAS_div2_i32         1
@@ -219,7 +220,7 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,
 
 #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
 
-#define TCG_TARGET_HAS_MEMORY_BSWAP  1
+#define TCG_TARGET_HAS_MEMORY_BSWAP  have_movbe
 
 #ifdef CONFIG_SOFTMMU
 #define TCG_TARGET_NEED_LDST_LABELS
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 436195894b..14e1cf9eee 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -154,13 +154,12 @@ bool have_bmi1;
 bool have_popcnt;
 bool have_avx1;
 bool have_avx2;
+bool have_movbe;
 
 #ifdef CONFIG_CPUID_H
-static bool have_movbe;
 static bool have_bmi2;
 static bool have_lzcnt;
 #else
-# define have_movbe 0
 # define have_bmi2 0
 # define have_lzcnt 0
 #endif
@@ -1884,12 +1883,11 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                                    TCGReg base, int index, intptr_t ofs,
                                    int seg, TCGMemOp memop)
 {
-    const TCGMemOp real_bswap = memop & MO_BSWAP;
-    TCGMemOp bswap = real_bswap;
+    bool need_bswap = memop & MO_BSWAP;
     int movop = OPC_MOVL_GvEv;
 
-    if (have_movbe && real_bswap) {
-        bswap = 0;
+    if (need_bswap) {
+        tcg_debug_assert(have_movbe);
         movop = OPC_MOVBE_GyMy;
     }
 
@@ -1903,46 +1901,41 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                                  base, index, 0, ofs);
         break;
     case MO_UW:
-        tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
+        if (!need_bswap) {
+            tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
                                  base, index, 0, ofs);
-        if (real_bswap) {
-            tcg_out_rolw_8(s, datalo);
+        } else if (datalo != base && datalo != index) {
+            tcg_out_movi(s, TCG_TYPE_I32, datalo, 0);
+            tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+                                     datalo, base, index, 0, ofs);
+        } else {
+            tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+                                     datalo, base, index, 0, ofs);
+            tcg_out_ext16u(s, datalo, datalo);
         }
         break;
     case MO_SW:
-        if (real_bswap) {
-            if (have_movbe) {
-                tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
-                                         datalo, base, index, 0, ofs);
-            } else {
-                tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
-                                         base, index, 0, ofs);
-                tcg_out_rolw_8(s, datalo);
-            }
-            tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
-        } else {
+        if (!need_bswap) {
             tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
                                      datalo, base, index, 0, ofs);
+        } else {
+            tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+                                     datalo, base, index, 0, ofs);
+            tcg_out_ext16s(s, datalo, datalo, P_REXW);
         }
         break;
     case MO_UL:
         tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
-        if (bswap) {
-            tcg_out_bswap32(s, datalo);
-        }
         break;
 #if TCG_TARGET_REG_BITS == 64
     case MO_SL:
-        if (real_bswap) {
-            tcg_out_modrm_sib_offset(s, movop + seg, datalo,
-                                     base, index, 0, ofs);
-            if (bswap) {
-                tcg_out_bswap32(s, datalo);
-            }
-            tcg_out_ext32s(s, datalo, datalo);
-        } else {
+        if (!need_bswap) {
             tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
                                      base, index, 0, ofs);
+        } else {
+            tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + seg, datalo,
+                                     base, index, 0, ofs);
+            tcg_out_ext32s(s, datalo, datalo);
         }
         break;
 #endif
@@ -1950,12 +1943,9 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
         if (TCG_TARGET_REG_BITS == 64) {
             tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
                                      base, index, 0, ofs);
-            if (bswap) {
-                tcg_out_bswap64(s, datalo);
-            }
         } else {
-            if (real_bswap) {
-                int t = datalo;
+            if (need_bswap) {
+                TCGReg t = datalo;
                 datalo = datahi;
                 datahi = t;
             }
@@ -1970,14 +1960,10 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
                                          base, index, 0, ofs);
             }
-            if (bswap) {
-                tcg_out_bswap32(s, datalo);
-                tcg_out_bswap32(s, datahi);
-            }
         }
         break;
     default:
-        tcg_abort();
+        g_assert_not_reached();
     }
 }
 
@@ -2053,17 +2039,11 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
                                    TCGReg base, intptr_t ofs, int seg,
                                    TCGMemOp memop)
 {
-    /* ??? Ideally we wouldn't need a scratch register.  For user-only,
-       we could perform the bswap twice to restore the original value
-       instead of moving to the scratch.  But as it is, the L constraint
-       means that TCG_REG_L0 is definitely free here.  */
-    const TCGReg scratch = TCG_REG_L0;
-    const TCGMemOp real_bswap = memop & MO_BSWAP;
-    TCGMemOp bswap = real_bswap;
+    bool need_bswap = memop & MO_BSWAP;
     int movop = OPC_MOVL_EvGv;
 
-    if (have_movbe && real_bswap) {
-        bswap = 0;
+    if (need_bswap) {
+        tcg_debug_assert(have_movbe);
         movop = OPC_MOVBE_MyGy;
     }
 
@@ -2072,46 +2052,24 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
         /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
            Use the scratch register if necessary.  */
         if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
-            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
-            datalo = scratch;
+            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_L0, datalo);
+            datalo = TCG_REG_L0;
         }
         tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
                              datalo, base, ofs);
         break;
     case MO_16:
-        if (bswap) {
-            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
-            tcg_out_rolw_8(s, scratch);
-            datalo = scratch;
-        }
         tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
         break;
     case MO_32:
-        if (bswap) {
-            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
-            tcg_out_bswap32(s, scratch);
-            datalo = scratch;
-        }
         tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
         break;
     case MO_64:
         if (TCG_TARGET_REG_BITS == 64) {
-            if (bswap) {
-                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
-                tcg_out_bswap64(s, scratch);
-                datalo = scratch;
-            }
             tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
-        } else if (bswap) {
-            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
-            tcg_out_bswap32(s, scratch);
-            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
-            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
-            tcg_out_bswap32(s, scratch);
-            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
         } else {
-            if (real_bswap) {
-                int t = datalo;
+            if (need_bswap) {
+                TCGReg t = datalo;
                 datalo = datahi;
                 datahi = t;
             }
@@ -2120,7 +2078,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
         }
         break;
     default:
-        tcg_abort();
+        g_assert_not_reached();
     }
 }
 
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PATCH 4/5] tcg/aarch64: Set TCG_TARGET_HAS_MEMORY_BSWAP to false
  2018-11-20 12:15 [Qemu-devel] [PATCH 0/5] tcg: Make bswap support in qemu_ld/st optional Richard Henderson
                   ` (2 preceding siblings ...)
  2018-11-20 12:15 ` [Qemu-devel] [PATCH 3/5] tcg/i386: Set TCG_TARGET_HAS_MEMORY_BSWAP with have_movbe Richard Henderson
@ 2018-11-20 12:15 ` Richard Henderson
  2018-11-20 12:15 ` [Qemu-devel] [PATCH 5/5] tcg/arm: " Richard Henderson
  2018-11-22  1:11 ` [Qemu-devel] [PATCH 0/5] tcg: Make bswap support in qemu_ld/st optional no-reply
  5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2018-11-20 12:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alistair.Francis

This allows us to remove some code from the backend, allowing
the generic code to emit any extra bswaps.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/aarch64/tcg-target.h     |  2 +-
 tcg/aarch64/tcg-target.inc.c | 63 ++++++++----------------------------
 2 files changed, 14 insertions(+), 51 deletions(-)

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index f966a4fcb3..ef9e9084b2 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -137,7 +137,7 @@ typedef enum {
 #define TCG_TARGET_HAS_mul_vec          1
 
 #define TCG_TARGET_DEFAULT_MO (0)
-#define TCG_TARGET_HAS_MEMORY_BSWAP     1
+#define TCG_TARGET_HAS_MEMORY_BSWAP     0
 
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 083592a4d7..784c31d783 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -1383,9 +1383,6 @@ static void * const qemu_ld_helpers[16] = {
     [MO_LEUW] = helper_le_lduw_mmu,
     [MO_LEUL] = helper_le_ldul_mmu,
     [MO_LEQ]  = helper_le_ldq_mmu,
-    [MO_BEUW] = helper_be_lduw_mmu,
-    [MO_BEUL] = helper_be_ldul_mmu,
-    [MO_BEQ]  = helper_be_ldq_mmu,
 };
 
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
@@ -1397,9 +1394,6 @@ static void * const qemu_st_helpers[16] = {
     [MO_LEUW] = helper_le_stw_mmu,
     [MO_LEUL] = helper_le_stl_mmu,
     [MO_LEQ]  = helper_le_stq_mmu,
-    [MO_BEUW] = helper_be_stw_mmu,
-    [MO_BEUL] = helper_be_stl_mmu,
-    [MO_BEQ]  = helper_be_stq_mmu,
 };
 
 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
@@ -1421,7 +1415,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
     tcg_out_adr(s, TCG_REG_X3, lb->raddr);
-    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+    tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]);
     if (opc & MO_SIGN) {
         tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
     } else {
@@ -1444,7 +1438,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
     tcg_out_adr(s, TCG_REG_X4, lb->raddr);
-    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+    tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]);
     tcg_out_goto(s, lb->raddr);
 }
 
@@ -1544,8 +1538,6 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
                                    TCGReg data_r, TCGReg addr_r,
                                    TCGType otype, TCGReg off_r)
 {
-    const TCGMemOp bswap = memop & MO_BSWAP;
-
     switch (memop & MO_SSIZE) {
     case MO_UB:
         tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
@@ -1556,43 +1548,22 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
         break;
     case MO_UW:
         tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
-        if (bswap) {
-            tcg_out_rev16(s, data_r, data_r);
-        }
         break;
     case MO_SW:
-        if (bswap) {
-            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
-            tcg_out_rev16(s, data_r, data_r);
-            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
-        } else {
-            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
-                           data_r, addr_r, otype, off_r);
-        }
+        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
+                       data_r, addr_r, otype, off_r);
         break;
     case MO_UL:
         tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
-        if (bswap) {
-            tcg_out_rev32(s, data_r, data_r);
-        }
         break;
     case MO_SL:
-        if (bswap) {
-            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
-            tcg_out_rev32(s, data_r, data_r);
-            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
-        } else {
-            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
-        }
+        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
         break;
     case MO_Q:
         tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
-        if (bswap) {
-            tcg_out_rev64(s, data_r, data_r);
-        }
         break;
     default:
-        tcg_abort();
+        g_assert_not_reached();
     }
 }
 
@@ -1600,35 +1571,21 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
                                    TCGReg data_r, TCGReg addr_r,
                                    TCGType otype, TCGReg off_r)
 {
-    const TCGMemOp bswap = memop & MO_BSWAP;
-
     switch (memop & MO_SIZE) {
     case MO_8:
         tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
         break;
     case MO_16:
-        if (bswap && data_r != TCG_REG_XZR) {
-            tcg_out_rev16(s, TCG_REG_TMP, data_r);
-            data_r = TCG_REG_TMP;
-        }
         tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
         break;
     case MO_32:
-        if (bswap && data_r != TCG_REG_XZR) {
-            tcg_out_rev32(s, TCG_REG_TMP, data_r);
-            data_r = TCG_REG_TMP;
-        }
         tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
         break;
     case MO_64:
-        if (bswap && data_r != TCG_REG_XZR) {
-            tcg_out_rev64(s, TCG_REG_TMP, data_r);
-            data_r = TCG_REG_TMP;
-        }
         tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
         break;
     default:
-        tcg_abort();
+        g_assert_not_reached();
     }
 }
 
@@ -1637,6 +1594,9 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
 {
     TCGMemOp memop = get_memop(oi);
     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+
+    tcg_debug_assert(!(memop & MO_BSWAP));
+
 #ifdef CONFIG_SOFTMMU
     unsigned mem_index = get_mmuidx(oi);
     tcg_insn_unit *label_ptr;
@@ -1662,6 +1622,9 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
 {
     TCGMemOp memop = get_memop(oi);
     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+
+    tcg_debug_assert(!(memop & MO_BSWAP));
+
 #ifdef CONFIG_SOFTMMU
     unsigned mem_index = get_mmuidx(oi);
     tcg_insn_unit *label_ptr;
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PATCH 5/5] tcg/arm: Set TCG_TARGET_HAS_MEMORY_BSWAP to false
  2018-11-20 12:15 [Qemu-devel] [PATCH 0/5] tcg: Make bswap support in qemu_ld/st optional Richard Henderson
                   ` (3 preceding siblings ...)
  2018-11-20 12:15 ` [Qemu-devel] [PATCH 4/5] tcg/aarch64: Set TCG_TARGET_HAS_MEMORY_BSWAP to false Richard Henderson
@ 2018-11-20 12:15 ` Richard Henderson
  2018-11-22  1:11 ` [Qemu-devel] [PATCH 0/5] tcg: Make bswap support in qemu_ld/st optional no-reply
  5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2018-11-20 12:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alistair.Francis

This allows us to remove some code from the backend, allowing
the generic code to emit any extra bswaps.  It also allows us
to avoid reserving an extra register for CONFIG_USER_ONLY.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/arm/tcg-target.h     |   2 +-
 tcg/arm/tcg-target.inc.c | 198 ++++++++++-----------------------------
 2 files changed, 48 insertions(+), 152 deletions(-)

diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 16172f73a3..a6f7be5483 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -131,7 +131,7 @@ enum {
 };
 
 #define TCG_TARGET_DEFAULT_MO (0)
-#define TCG_TARGET_HAS_MEMORY_BSWAP     1
+#define TCG_TARGET_HAS_MEMORY_BSWAP     0
 
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index e1fbf465cb..0043454d6b 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -285,11 +285,10 @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
     case 's':
         ct->ct |= TCG_CT_REG;
         ct->u.regs = 0xffff;
-        /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
-           and r0-r1 doing the byte swapping, so don't use these. */
+#if defined(CONFIG_SOFTMMU)
+        /* r0-r2 will be overwritten when reading the tlb entry.  */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-#if defined(CONFIG_SOFTMMU)
         /* Avoid clashes with registers being used for helper args */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
 #if TARGET_LONG_BITS == 64
@@ -870,21 +869,6 @@ static inline void tcg_out_ext16u(TCGContext *s, int cond,
     }
 }
 
-static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
-{
-    if (use_armv6_instructions) {
-        /* revsh */
-        tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
-    } else {
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
-        tcg_out_dat_reg(s, cond, ARITH_ORR,
-                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
-    }
-}
-
 static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
 {
     if (use_armv6_instructions) {
@@ -900,22 +884,6 @@ static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
     }
 }
 
-/* swap the two low bytes assuming that the two high input bytes and the
-   two high output bit can hold any value. */
-static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
-{
-    if (use_armv6_instructions) {
-        /* rev16 */
-        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
-    } else {
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
-        tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
-        tcg_out_dat_reg(s, cond, ARITH_ORR,
-                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
-    }
-}
-
 static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
 {
     if (use_armv6_instructions) {
@@ -1410,9 +1378,9 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
        icache usage.  For pre-armv6, use the signed helpers since we do
        not have a single insn sign-extend.  */
     if (use_armv6_instructions) {
-        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
+        func = qemu_ld_helpers[opc & MO_SIZE];
     } else {
-        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
+        func = qemu_ld_helpers[opc & MO_SSIZE];
         if (opc & MO_SIGN) {
             opc = MO_UL;
         }
@@ -1487,7 +1455,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
 
     /* Tail-call to the helper, which will return to the fast path.  */
-    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
 }
 #endif /* SOFTMMU */
 
@@ -1495,8 +1463,6 @@ static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
                                          TCGReg datalo, TCGReg datahi,
                                          TCGReg addrlo, TCGReg addend)
 {
-    TCGMemOp bswap = opc & MO_BSWAP;
-
     switch (opc & MO_SSIZE) {
     case MO_UB:
         tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
@@ -1506,49 +1472,30 @@ static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
         break;
     case MO_UW:
         tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
-        if (bswap) {
-            tcg_out_bswap16(s, COND_AL, datalo, datalo);
-        }
         break;
     case MO_SW:
-        if (bswap) {
-            tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
-            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
-        } else {
-            tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
-        }
+        tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
         break;
     case MO_UL:
-    default:
         tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
-        if (bswap) {
-            tcg_out_bswap32(s, COND_AL, datalo, datalo);
-        }
         break;
     case MO_Q:
-        {
-            TCGReg dl = (bswap ? datahi : datalo);
-            TCGReg dh = (bswap ? datalo : datahi);
-
-            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
-            if (USING_SOFTMMU && use_armv6_instructions
-                && (dl & 1) == 0 && dh == dl + 1) {
-                tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
-            } else if (dl != addend) {
-                tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
-                tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
-            } else {
-                tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
-                                addend, addrlo, SHIFT_IMM_LSL(0));
-                tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
-                tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
-            }
-            if (bswap) {
-                tcg_out_bswap32(s, COND_AL, dl, dl);
-                tcg_out_bswap32(s, COND_AL, dh, dh);
-            }
+        /* Avoid ldrd for user-only emulation, to handle unaligned.  */
+        if (USING_SOFTMMU && use_armv6_instructions
+            && (datalo & 1) == 0 && datahi == datalo + 1) {
+            tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);
+        } else if (datalo != addend) {
+            tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo);
+            tcg_out_ld32_12(s, COND_AL, datahi, addend, 4);
+        } else {
+            tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
+                            addend, addrlo, SHIFT_IMM_LSL(0));
+            tcg_out_ld32_12(s, COND_AL, datalo, TCG_REG_TMP, 0);
+            tcg_out_ld32_12(s, COND_AL, datahi, TCG_REG_TMP, 4);
         }
         break;
+    default:
+        g_assert_not_reached();
     }
 }
 
@@ -1556,8 +1503,6 @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
                                           TCGReg datalo, TCGReg datahi,
                                           TCGReg addrlo)
 {
-    TCGMemOp bswap = opc & MO_BSWAP;
-
     switch (opc & MO_SSIZE) {
     case MO_UB:
         tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
@@ -1567,47 +1512,28 @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
         break;
     case MO_UW:
         tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
-        if (bswap) {
-            tcg_out_bswap16(s, COND_AL, datalo, datalo);
-        }
         break;
     case MO_SW:
-        if (bswap) {
-            tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
-            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
-        } else {
-            tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
-        }
+        tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
         break;
     case MO_UL:
-    default:
         tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
-        if (bswap) {
-            tcg_out_bswap32(s, COND_AL, datalo, datalo);
-        }
         break;
     case MO_Q:
-        {
-            TCGReg dl = (bswap ? datahi : datalo);
-            TCGReg dh = (bswap ? datalo : datahi);
-
-            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
-            if (USING_SOFTMMU && use_armv6_instructions
-                && (dl & 1) == 0 && dh == dl + 1) {
-                tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
-            } else if (dl == addrlo) {
-                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
-                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
-            } else {
-                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
-                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
-            }
-            if (bswap) {
-                tcg_out_bswap32(s, COND_AL, dl, dl);
-                tcg_out_bswap32(s, COND_AL, dh, dh);
-            }
+        /* Avoid ldrd for user-only emulation, to handle unaligned.  */
+        if (USING_SOFTMMU && use_armv6_instructions
+            && (datalo & 1) == 0 && datahi == datalo + 1) {
+            tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
+        } else if (datalo == addrlo) {
+            tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
+            tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
+        } else {
+            tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
+            tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
         }
         break;
+    default:
+        g_assert_not_reached();
     }
 }
 
@@ -1628,6 +1554,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
     addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
     oi = *args++;
     opc = get_memop(oi);
+    tcg_debug_assert(!(opc & MO_BSWAP));
 
 #ifdef CONFIG_SOFTMMU
     mem_index = get_mmuidx(oi);
@@ -1656,44 +1583,28 @@ static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
                                          TCGReg datalo, TCGReg datahi,
                                          TCGReg addrlo, TCGReg addend)
 {
-    TCGMemOp bswap = opc & MO_BSWAP;
-
     switch (opc & MO_SIZE) {
     case MO_8:
         tcg_out_st8_r(s, cond, datalo, addrlo, addend);
         break;
     case MO_16:
-        if (bswap) {
-            tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
-            tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
-        } else {
-            tcg_out_st16_r(s, cond, datalo, addrlo, addend);
-        }
+        tcg_out_st16_r(s, cond, datalo, addrlo, addend);
         break;
     case MO_32:
-    default:
-        if (bswap) {
-            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
-            tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
-        } else {
-            tcg_out_st32_r(s, cond, datalo, addrlo, addend);
-        }
+        tcg_out_st32_r(s, cond, datalo, addrlo, addend);
         break;
     case MO_64:
         /* Avoid strd for user-only emulation, to handle unaligned.  */
-        if (bswap) {
-            tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
-            tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
-            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
-            tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
-        } else if (USING_SOFTMMU && use_armv6_instructions
-                   && (datalo & 1) == 0 && datahi == datalo + 1) {
+        if (USING_SOFTMMU && use_armv6_instructions
+            && (datalo & 1) == 0 && datahi == datalo + 1) {
             tcg_out_strd_r(s, cond, datalo, addrlo, addend);
         } else {
             tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
             tcg_out_st32_12(s, cond, datahi, addend, 4);
         }
         break;
+    default:
+        g_assert_not_reached();
     }
 }
 
@@ -1701,44 +1612,28 @@ static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
                                           TCGReg datalo, TCGReg datahi,
                                           TCGReg addrlo)
 {
-    TCGMemOp bswap = opc & MO_BSWAP;
-
     switch (opc & MO_SIZE) {
     case MO_8:
         tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
         break;
     case MO_16:
-        if (bswap) {
-            tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
-            tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
-        } else {
-            tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
-        }
+        tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
         break;
     case MO_32:
-    default:
-        if (bswap) {
-            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
-            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
-        } else {
-            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
-        }
+        tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
         break;
     case MO_64:
         /* Avoid strd for user-only emulation, to handle unaligned.  */
-        if (bswap) {
-            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
-            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
-            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
-            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
-        } else if (USING_SOFTMMU && use_armv6_instructions
-                   && (datalo & 1) == 0 && datahi == datalo + 1) {
+        if (USING_SOFTMMU && use_armv6_instructions
+            && (datalo & 1) == 0 && datahi == datalo + 1) {
             tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
         } else {
             tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
             tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
         }
         break;
+    default:
+        g_assert_not_reached();
     }
 }
 
@@ -1759,6 +1654,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
     addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
     oi = *args++;
     opc = get_memop(oi);
+    tcg_debug_assert(!(opc & MO_BSWAP));
 
 #ifdef CONFIG_SOFTMMU
     mem_index = get_mmuidx(oi);
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg: Make bswap support in qemu_ld/st optional
  2018-11-20 12:15 [Qemu-devel] [PATCH 0/5] tcg: Make bswap support in qemu_ld/st optional Richard Henderson
                   ` (4 preceding siblings ...)
  2018-11-20 12:15 ` [Qemu-devel] [PATCH 5/5] tcg/arm: " Richard Henderson
@ 2018-11-22  1:11 ` no-reply
  5 siblings, 0 replies; 7+ messages in thread
From: no-reply @ 2018-11-22  1:11 UTC (permalink / raw)
  To: richard.henderson; +Cc: famz, qemu-devel, Alistair.Francis

Hi,

This series seems to have some coding style problems. See output below for
more information:

Message-id: 20181120121558.7660-1-richard.henderson@linaro.org
Type: series
Subject: [Qemu-devel] [PATCH 0/5] tcg: Make bswap support in qemu_ld/st optional

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
    echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
    if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
        failed=1
        echo
    fi
    n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
6336156 tcg/arm: Set TCG_TARGET_HAS_MEMORY_BSWAP to false
8b6c77b tcg/aarch64: Set TCG_TARGET_HAS_MEMORY_BSWAP to false
97936ce tcg/i386: Set TCG_TARGET_HAS_MEMORY_BSWAP with have_movbe
b6d6faf tcg/optimize: Optimize bswap
6a671c7 tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP

=== OUTPUT BEGIN ===
Checking PATCH 1/5: tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP...
Checking PATCH 2/5: tcg/optimize: Optimize bswap...
ERROR: spaces required around that ':' (ctx:VxE)
#21: FILE: tcg/optimize.c:356:
+    CASE_OP_32_64(bswap16):
                           ^

ERROR: spaces required around that ':' (ctx:VxE)
#24: FILE: tcg/optimize.c:359:
+    CASE_OP_32_64(bswap32):
                           ^

ERROR: spaces required around that ':' (ctx:VxE)
#37: FILE: tcg/optimize.c:1117:
+        CASE_OP_32_64(bswap16):
                               ^

ERROR: spaces required around that ':' (ctx:VxE)
#38: FILE: tcg/optimize.c:1118:
+        CASE_OP_32_64(bswap32):
                               ^

total: 4 errors, 0 warnings, 24 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 3/5: tcg/i386: Set TCG_TARGET_HAS_MEMORY_BSWAP with have_movbe...
Checking PATCH 4/5: tcg/aarch64: Set TCG_TARGET_HAS_MEMORY_BSWAP to false...
Checking PATCH 5/5: tcg/arm: Set TCG_TARGET_HAS_MEMORY_BSWAP to false...
=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-devel@redhat.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-11-22  1:12 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-20 12:15 [Qemu-devel] [PATCH 0/5] tcg: Make bswap support in qemu_ld/st optional Richard Henderson
2018-11-20 12:15 ` [Qemu-devel] [PATCH 1/5] tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP Richard Henderson
2018-11-20 12:15 ` [Qemu-devel] [PATCH 2/5] tcg/optimize: Optimize bswap Richard Henderson
2018-11-20 12:15 ` [Qemu-devel] [PATCH 3/5] tcg/i386: Set TCG_TARGET_HAS_MEMORY_BSWAP with have_movbe Richard Henderson
2018-11-20 12:15 ` [Qemu-devel] [PATCH 4/5] tcg/aarch64: Set TCG_TARGET_HAS_MEMORY_BSWAP to false Richard Henderson
2018-11-20 12:15 ` [Qemu-devel] [PATCH 5/5] tcg/arm: " Richard Henderson
2018-11-22  1:11 ` [Qemu-devel] [PATCH 0/5] tcg: Make bswap support in qemu_ld/st optional no-reply

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.