All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/9] tcg/s390 improvements
@ 2014-05-03 14:08 Richard Henderson
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 1/9] tcg-s390: Convert to TCGMemOp Richard Henderson
                   ` (8 more replies)
  0 siblings, 9 replies; 11+ messages in thread
From: Richard Henderson @ 2014-05-03 14:08 UTC (permalink / raw)
  To: qemu-devel

Most of these relate to converting to the "new" ldst opcodes.
(Two unconverted backends remaining...)

In addition, some other minor improvements to the generated code.
Tested on a z10 system, where the LOAD ON CONDITION insn is supported.


r~


Richard Henderson (9):
  tcg-s390: Convert to TCGMemOp
  tcg-s390: Integrate endianness into TCGMemOp
  tcg-s390: Convert to new ldst opcodes
  tcg-s390: Move ldst helpers out of line
  tcg-s390: Use more risbg in the tlb sequence
  tcg-s390: Implement tcg_register_jit
  tcg-s390: Allow immediate operands to add2 and sub2
  tcg-s390: Improve setcond
  tcg-s390: Don't force -march=z990

 configure             |   4 +-
 tcg/s390/tcg-target.c | 748 +++++++++++++++++++++++++++++---------------------
 tcg/s390/tcg-target.h |   2 +-
 3 files changed, 443 insertions(+), 311 deletions(-)

-- 
1.9.0

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH 1/9] tcg-s390: Convert to TCGMemOp
  2014-05-03 14:08 [Qemu-devel] [PATCH 0/9] tcg/s390 improvements Richard Henderson
@ 2014-05-03 14:08 ` Richard Henderson
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 2/9] tcg-s390: Integrate endianness into TCGMemOp Richard Henderson
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2014-05-03 14:08 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/s390/tcg-target.c | 82 ++++++++++++++++++++++-----------------------------
 1 file changed, 36 insertions(+), 46 deletions(-)

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index ebdd074..131eec3 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -227,16 +227,6 @@ typedef enum S390Opcode {
     RX_STH      = 0x40,
 } S390Opcode;
 
-#define LD_SIGNED      0x04
-#define LD_UINT8       0x00
-#define LD_INT8        (LD_UINT8 | LD_SIGNED)
-#define LD_UINT16      0x01
-#define LD_INT16       (LD_UINT16 | LD_SIGNED)
-#define LD_UINT32      0x02
-#define LD_INT32       (LD_UINT32 | LD_SIGNED)
-#define LD_UINT64      0x03
-#define LD_INT64       (LD_UINT64 | LD_SIGNED)
-
 #ifndef NDEBUG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
@@ -1280,7 +1270,7 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
     }
 }
 
-static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data,
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, TCGReg data,
                                    TCGReg base, TCGReg index, int disp)
 {
 #ifdef TARGET_WORDS_BIGENDIAN
@@ -1289,13 +1279,13 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data,
     const int bswap = 1;
 #endif
     switch (opc) {
-    case LD_UINT8:
+    case MO_UB:
         tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
         break;
-    case LD_INT8:
+    case MO_SB:
         tcg_out_insn(s, RXY, LGB, data, base, index, disp);
         break;
-    case LD_UINT16:
+    case MO_UW:
         if (bswap) {
             /* swapped unsigned halfword load with upper bits zeroed */
             tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
@@ -1304,7 +1294,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data,
             tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
         }
         break;
-    case LD_INT16:
+    case MO_SW:
         if (bswap) {
             /* swapped sign-extended halfword load */
             tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
@@ -1313,7 +1303,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data,
             tcg_out_insn(s, RXY, LGH, data, base, index, disp);
         }
         break;
-    case LD_UINT32:
+    case MO_UL:
         if (bswap) {
             /* swapped unsigned int load with upper bits zeroed */
             tcg_out_insn(s, RXY, LRV, data, base, index, disp);
@@ -1322,7 +1312,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data,
             tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
         }
         break;
-    case LD_INT32:
+    case MO_SL:
         if (bswap) {
             /* swapped sign-extended int load */
             tcg_out_insn(s, RXY, LRV, data, base, index, disp);
@@ -1331,7 +1321,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data,
             tcg_out_insn(s, RXY, LGF, data, base, index, disp);
         }
         break;
-    case LD_UINT64:
+    case MO_Q:
         if (bswap) {
             tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
         } else {
@@ -1343,7 +1333,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data,
     }
 }
 
-static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data,
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data,
                                    TCGReg base, TCGReg index, int disp)
 {
 #ifdef TARGET_WORDS_BIGENDIAN
@@ -1352,14 +1342,14 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data,
     const int bswap = 1;
 #endif
     switch (opc) {
-    case LD_UINT8:
+    case MO_UB:
         if (disp >= 0 && disp < 0x1000) {
             tcg_out_insn(s, RX, STC, data, base, index, disp);
         } else {
             tcg_out_insn(s, RXY, STCY, data, base, index, disp);
         }
         break;
-    case LD_UINT16:
+    case MO_UW:
         if (bswap) {
             tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
         } else if (disp >= 0 && disp < 0x1000) {
@@ -1368,7 +1358,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data,
             tcg_out_insn(s, RXY, STHY, data, base, index, disp);
         }
         break;
-    case LD_UINT32:
+    case MO_UL:
         if (bswap) {
             tcg_out_insn(s, RXY, STRV, data, base, index, disp);
         } else if (disp >= 0 && disp < 0x1000) {
@@ -1377,7 +1367,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data,
             tcg_out_insn(s, RXY, STY, data, base, index, disp);
         }
         break;
-    case LD_UINT64:
+    case MO_Q:
         if (bswap) {
             tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
         } else {
@@ -1398,7 +1388,7 @@ static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
     const TCGReg arg1 = tcg_target_call_iarg_regs[1];
     const TCGReg arg2 = tcg_target_call_iarg_regs[2];
     const TCGReg arg3 = tcg_target_call_iarg_regs[3];
-    int s_bits = opc & 3;
+    TCGMemOp s_bits = opc & MO_SIZE;
     tcg_insn_unit *label1_ptr;
     tcg_target_long ofs;
 
@@ -1442,17 +1432,17 @@ static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
     if (is_store) {
         /* Make sure to zero-extend the value to the full register
            for the calling convention.  */
-        switch (opc) {
-        case LD_UINT8:
+        switch (s_bits) {
+        case MO_UB:
             tgen_ext8u(s, TCG_TYPE_I64, arg2, data_reg);
             break;
-        case LD_UINT16:
+        case MO_UW:
             tgen_ext16u(s, TCG_TYPE_I64, arg2, data_reg);
             break;
-        case LD_UINT32:
+        case MO_UL:
             tgen_ext32u(s, arg2, data_reg);
             break;
-        case LD_UINT64:
+        case MO_Q:
             tcg_out_mov(s, TCG_TYPE_I64, arg2, data_reg);
             break;
         default:
@@ -1468,13 +1458,13 @@ static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
 
         /* sign extension */
         switch (opc) {
-        case LD_INT8:
+        case MO_SB:
             tgen_ext8s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
             break;
-        case LD_INT16:
+        case MO_SW:
             tgen_ext16s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
             break;
-        case LD_INT32:
+        case MO_SL:
             tgen_ext32s(s, data_reg, TCG_REG_R2);
             break;
         default:
@@ -1525,7 +1515,7 @@ static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
 
 /* load data with address translation (if applicable)
    and endianness conversion */
-static void tcg_out_qemu_ld(TCGContext* s, const TCGArg* args, int opc)
+static void tcg_out_qemu_ld(TCGContext* s, const TCGArg* args, TCGMemOp opc)
 {
     TCGReg addr_reg, data_reg;
 #if defined(CONFIG_SOFTMMU)
@@ -1554,7 +1544,7 @@ static void tcg_out_qemu_ld(TCGContext* s, const TCGArg* args, int opc)
 #endif
 }
 
-static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc)
+static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, TCGMemOp opc)
 {
     TCGReg addr_reg, data_reg;
 #if defined(CONFIG_SOFTMMU)
@@ -1812,36 +1802,36 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_qemu_ld8u:
-        tcg_out_qemu_ld(s, args, LD_UINT8);
+        tcg_out_qemu_ld(s, args, MO_UB);
         break;
     case INDEX_op_qemu_ld8s:
-        tcg_out_qemu_ld(s, args, LD_INT8);
+        tcg_out_qemu_ld(s, args, MO_SB);
         break;
     case INDEX_op_qemu_ld16u:
-        tcg_out_qemu_ld(s, args, LD_UINT16);
+        tcg_out_qemu_ld(s, args, MO_UW);
         break;
     case INDEX_op_qemu_ld16s:
-        tcg_out_qemu_ld(s, args, LD_INT16);
+        tcg_out_qemu_ld(s, args, MO_SW);
         break;
     case INDEX_op_qemu_ld32:
         /* ??? Technically we can use a non-extending instruction.  */
-        tcg_out_qemu_ld(s, args, LD_UINT32);
+        tcg_out_qemu_ld(s, args, MO_UL);
         break;
     case INDEX_op_qemu_ld64:
-        tcg_out_qemu_ld(s, args, LD_UINT64);
+        tcg_out_qemu_ld(s, args, MO_Q);
         break;
 
     case INDEX_op_qemu_st8:
-        tcg_out_qemu_st(s, args, LD_UINT8);
+        tcg_out_qemu_st(s, args, MO_UB);
         break;
     case INDEX_op_qemu_st16:
-        tcg_out_qemu_st(s, args, LD_UINT16);
+        tcg_out_qemu_st(s, args, MO_UW);
         break;
     case INDEX_op_qemu_st32:
-        tcg_out_qemu_st(s, args, LD_UINT32);
+        tcg_out_qemu_st(s, args, MO_UL);
         break;
     case INDEX_op_qemu_st64:
-        tcg_out_qemu_st(s, args, LD_UINT64);
+        tcg_out_qemu_st(s, args, MO_Q);
         break;
 
     case INDEX_op_ld16s_i64:
@@ -2038,10 +2028,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_qemu_ld32u:
-        tcg_out_qemu_ld(s, args, LD_UINT32);
+        tcg_out_qemu_ld(s, args, MO_UL);
         break;
     case INDEX_op_qemu_ld32s:
-        tcg_out_qemu_ld(s, args, LD_INT32);
+        tcg_out_qemu_ld(s, args, MO_SL);
         break;
 
     OP_32_64(deposit):
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH 2/9] tcg-s390: Integrate endianness into TCGMemOp
  2014-05-03 14:08 [Qemu-devel] [PATCH 0/9] tcg/s390 improvements Richard Henderson
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 1/9] tcg-s390: Convert to TCGMemOp Richard Henderson
@ 2014-05-03 14:08 ` Richard Henderson
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 3/9] tcg-s390: Convert to new ldst opcodes Richard Henderson
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2014-05-03 14:08 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/s390/tcg-target.c | 124 ++++++++++++++++++++++++--------------------------
 1 file changed, 60 insertions(+), 64 deletions(-)

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 131eec3..c6d86f5 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -1273,11 +1273,6 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, TCGReg data,
                                    TCGReg base, TCGReg index, int disp)
 {
-#ifdef TARGET_WORDS_BIGENDIAN
-    const int bswap = 0;
-#else
-    const int bswap = 1;
-#endif
     switch (opc) {
     case MO_UB:
         tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
@@ -1285,49 +1280,50 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, TCGReg data,
     case MO_SB:
         tcg_out_insn(s, RXY, LGB, data, base, index, disp);
         break;
+
+    case MO_UW | MO_BSWAP:
+        /* swapped unsigned halfword load with upper bits zeroed */
+        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
+        tgen_ext16u(s, TCG_TYPE_I64, data, data);
+        break;
     case MO_UW:
-        if (bswap) {
-            /* swapped unsigned halfword load with upper bits zeroed */
-            tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
-            tgen_ext16u(s, TCG_TYPE_I64, data, data);
-        } else {
-            tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
-        }
+        tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
+        break;
+
+    case MO_SW | MO_BSWAP:
+        /* swapped sign-extended halfword load */
+        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
+        tgen_ext16s(s, TCG_TYPE_I64, data, data);
         break;
     case MO_SW:
-        if (bswap) {
-            /* swapped sign-extended halfword load */
-            tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
-            tgen_ext16s(s, TCG_TYPE_I64, data, data);
-        } else {
-            tcg_out_insn(s, RXY, LGH, data, base, index, disp);
-        }
+        tcg_out_insn(s, RXY, LGH, data, base, index, disp);
+        break;
+
+    case MO_UL | MO_BSWAP:
+        /* swapped unsigned int load with upper bits zeroed */
+        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
+        tgen_ext32u(s, data, data);
         break;
     case MO_UL:
-        if (bswap) {
-            /* swapped unsigned int load with upper bits zeroed */
-            tcg_out_insn(s, RXY, LRV, data, base, index, disp);
-            tgen_ext32u(s, data, data);
-        } else {
-            tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
-        }
+        tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
+        break;
+
+    case MO_SL | MO_BSWAP:
+        /* swapped sign-extended int load */
+        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
+        tgen_ext32s(s, data, data);
         break;
     case MO_SL:
-        if (bswap) {
-            /* swapped sign-extended int load */
-            tcg_out_insn(s, RXY, LRV, data, base, index, disp);
-            tgen_ext32s(s, data, data);
-        } else {
-            tcg_out_insn(s, RXY, LGF, data, base, index, disp);
-        }
+        tcg_out_insn(s, RXY, LGF, data, base, index, disp);
+        break;
+
+    case MO_Q | MO_BSWAP:
+        tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
         break;
     case MO_Q:
-        if (bswap) {
-            tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
-        } else {
-            tcg_out_insn(s, RXY, LG, data, base, index, disp);
-        }
+        tcg_out_insn(s, RXY, LG, data, base, index, disp);
         break;
+
     default:
         tcg_abort();
     }
@@ -1336,11 +1332,6 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, TCGReg data,
 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data,
                                    TCGReg base, TCGReg index, int disp)
 {
-#ifdef TARGET_WORDS_BIGENDIAN
-    const int bswap = 0;
-#else
-    const int bswap = 1;
-#endif
     switch (opc) {
     case MO_UB:
         if (disp >= 0 && disp < 0x1000) {
@@ -1349,31 +1340,36 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data,
             tcg_out_insn(s, RXY, STCY, data, base, index, disp);
         }
         break;
+
+    case MO_UW | MO_BSWAP:
+        tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
+        break;
     case MO_UW:
-        if (bswap) {
-            tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
-        } else if (disp >= 0 && disp < 0x1000) {
+        if (disp >= 0 && disp < 0x1000) {
             tcg_out_insn(s, RX, STH, data, base, index, disp);
         } else {
             tcg_out_insn(s, RXY, STHY, data, base, index, disp);
         }
         break;
+
+    case MO_UL | MO_BSWAP:
+        tcg_out_insn(s, RXY, STRV, data, base, index, disp);
+        break;
     case MO_UL:
-        if (bswap) {
-            tcg_out_insn(s, RXY, STRV, data, base, index, disp);
-        } else if (disp >= 0 && disp < 0x1000) {
+        if (disp >= 0 && disp < 0x1000) {
             tcg_out_insn(s, RX, ST, data, base, index, disp);
         } else {
             tcg_out_insn(s, RXY, STY, data, base, index, disp);
         }
         break;
+
+    case MO_Q | MO_BSWAP:
+        tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
+        break;
     case MO_Q:
-        if (bswap) {
-            tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
-        } else {
-            tcg_out_insn(s, RXY, STG, data, base, index, disp);
-        }
+        tcg_out_insn(s, RXY, STG, data, base, index, disp);
         break;
+
     default:
         tcg_abort();
     }
@@ -1457,7 +1453,7 @@ static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
         tcg_out_call(s, qemu_ld_helpers[s_bits]);
 
         /* sign extension */
-        switch (opc) {
+        switch (opc & MO_SSIZE) {
         case MO_SB:
             tgen_ext8s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
             break;
@@ -1808,30 +1804,30 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_qemu_ld(s, args, MO_SB);
         break;
     case INDEX_op_qemu_ld16u:
-        tcg_out_qemu_ld(s, args, MO_UW);
+        tcg_out_qemu_ld(s, args, MO_TEUW);
         break;
     case INDEX_op_qemu_ld16s:
-        tcg_out_qemu_ld(s, args, MO_SW);
+        tcg_out_qemu_ld(s, args, MO_TESW);
         break;
     case INDEX_op_qemu_ld32:
         /* ??? Technically we can use a non-extending instruction.  */
-        tcg_out_qemu_ld(s, args, MO_UL);
+        tcg_out_qemu_ld(s, args, MO_TEUL);
         break;
     case INDEX_op_qemu_ld64:
-        tcg_out_qemu_ld(s, args, MO_Q);
+        tcg_out_qemu_ld(s, args, MO_TEQ);
         break;
 
     case INDEX_op_qemu_st8:
         tcg_out_qemu_st(s, args, MO_UB);
         break;
     case INDEX_op_qemu_st16:
-        tcg_out_qemu_st(s, args, MO_UW);
+        tcg_out_qemu_st(s, args, MO_TEUW);
         break;
     case INDEX_op_qemu_st32:
-        tcg_out_qemu_st(s, args, MO_UL);
+        tcg_out_qemu_st(s, args, MO_TEUL);
         break;
     case INDEX_op_qemu_st64:
-        tcg_out_qemu_st(s, args, MO_Q);
+        tcg_out_qemu_st(s, args, MO_TEQ);
         break;
 
     case INDEX_op_ld16s_i64:
@@ -2028,10 +2024,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_qemu_ld32u:
-        tcg_out_qemu_ld(s, args, MO_UL);
+        tcg_out_qemu_ld(s, args, MO_TEUL);
         break;
     case INDEX_op_qemu_ld32s:
-        tcg_out_qemu_ld(s, args, MO_SL);
+        tcg_out_qemu_ld(s, args, MO_TESL);
         break;
 
     OP_32_64(deposit):
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH 3/9] tcg-s390: Convert to new ldst opcodes
  2014-05-03 14:08 [Qemu-devel] [PATCH 0/9] tcg/s390 improvements Richard Henderson
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 1/9] tcg-s390: Convert to TCGMemOp Richard Henderson
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 2/9] tcg-s390: Integrate endianness into TCGMemOp Richard Henderson
@ 2014-05-03 14:08 ` Richard Henderson
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 4/9] tcg-s390: Move ldst helpers out of line Richard Henderson
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2014-05-03 14:08 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/s390/tcg-target.c | 170 ++++++++++++++++----------------------------------
 tcg/s390/tcg-target.h |   2 +-
 2 files changed, 56 insertions(+), 116 deletions(-)

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index c6d86f5..332822f 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -238,6 +238,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
    call-saved registers.  Likewise prefer the call-clobbered registers
    in reverse order to maximize the chance of avoiding the arguments.  */
 static const int tcg_target_reg_alloc_order[] = {
+    /* Call saved registers.  */
     TCG_REG_R13,
     TCG_REG_R12,
     TCG_REG_R11,
@@ -246,9 +247,11 @@ static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_R8,
     TCG_REG_R7,
     TCG_REG_R6,
+    /* Call clobbered registers.  */
     TCG_REG_R14,
     TCG_REG_R0,
     TCG_REG_R1,
+    /* Argument registers, in reverse order of allocation.  */
     TCG_REG_R5,
     TCG_REG_R4,
     TCG_REG_R3,
@@ -308,22 +311,29 @@ static const uint8_t tcg_cond_to_ltr_cond[] = {
 };
 
 #ifdef CONFIG_SOFTMMU
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
-   int mmu_idx) */
-static void * const qemu_ld_helpers[4] = {
-    helper_ldb_mmu,
-    helper_ldw_mmu,
-    helper_ldl_mmu,
-    helper_ldq_mmu,
+static void * const qemu_ld_helpers[16] = {
+    [MO_UB]   = helper_ret_ldub_mmu,
+    [MO_SB]   = helper_ret_ldsb_mmu,
+    [MO_LEUW] = helper_le_lduw_mmu,
+    [MO_LESW] = helper_le_ldsw_mmu,
+    [MO_LEUL] = helper_le_ldul_mmu,
+    [MO_LESL] = helper_le_ldsl_mmu,
+    [MO_LEQ]  = helper_le_ldq_mmu,
+    [MO_BEUW] = helper_be_lduw_mmu,
+    [MO_BESW] = helper_be_ldsw_mmu,
+    [MO_BEUL] = helper_be_ldul_mmu,
+    [MO_BESL] = helper_be_ldsl_mmu,
+    [MO_BEQ]  = helper_be_ldq_mmu,
 };
 
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
-   uintxx_t val, int mmu_idx) */
-static void * const qemu_st_helpers[4] = {
-    helper_stb_mmu,
-    helper_stw_mmu,
-    helper_stl_mmu,
-    helper_stq_mmu,
+static void * const qemu_st_helpers[16] = {
+    [MO_UB]   = helper_ret_stb_mmu,
+    [MO_LEUW] = helper_le_stw_mmu,
+    [MO_LEUL] = helper_le_stl_mmu,
+    [MO_LEQ]  = helper_le_stq_mmu,
+    [MO_BEUW] = helper_be_stw_mmu,
+    [MO_BEUL] = helper_be_stl_mmu,
+    [MO_BEQ]  = helper_be_stq_mmu,
 };
 #endif
 
@@ -1384,6 +1394,7 @@ static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
     const TCGReg arg1 = tcg_target_call_iarg_regs[1];
     const TCGReg arg2 = tcg_target_call_iarg_regs[2];
     const TCGReg arg3 = tcg_target_call_iarg_regs[3];
+    const TCGReg arg4 = tcg_target_call_iarg_regs[4];
     TCGMemOp s_bits = opc & MO_SIZE;
     tcg_insn_unit *label1_ptr;
     tcg_target_long ofs;
@@ -1445,29 +1456,15 @@ static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
             tcg_abort();
         }
         tcg_out_movi(s, TCG_TYPE_I32, arg3, mem_index);
-        tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0);
-        tcg_out_call(s, qemu_st_helpers[s_bits]);
+        tcg_out_mov(s, TCG_TYPE_PTR, arg0, TCG_AREG0);
+        tcg_out_movi(s, TCG_TYPE_PTR, arg4, (uintptr_t)s->code_ptr);
+        tcg_out_call(s, qemu_st_helpers[opc]);
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
-        tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0);
-        tcg_out_call(s, qemu_ld_helpers[s_bits]);
-
-        /* sign extension */
-        switch (opc & MO_SSIZE) {
-        case MO_SB:
-            tgen_ext8s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
-            break;
-        case MO_SW:
-            tgen_ext16s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
-            break;
-        case MO_SL:
-            tgen_ext32s(s, data_reg, TCG_REG_R2);
-            break;
-        default:
-            /* unsigned -> just copy */
-            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
-            break;
-        }
+        tcg_out_mov(s, TCG_TYPE_PTR, arg0, TCG_AREG0);
+        tcg_out_movi(s, TCG_TYPE_PTR, arg3, (uintptr_t)s->code_ptr);
+        tcg_out_call(s, qemu_ld_helpers[opc]);
+        tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
     }
 
     /* jump to label2 (end) */
@@ -1511,59 +1508,39 @@ static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
 
 /* load data with address translation (if applicable)
    and endianness conversion */
-static void tcg_out_qemu_ld(TCGContext* s, const TCGArg* args, TCGMemOp opc)
+static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
+                            TCGMemOp opc, int mem_index)
 {
-    TCGReg addr_reg, data_reg;
 #if defined(CONFIG_SOFTMMU)
-    int mem_index;
     tcg_insn_unit *label2_ptr;
-#else
-    TCGReg index_reg;
-    tcg_target_long disp;
-#endif
-
-    data_reg = *args++;
-    addr_reg = *args++;
-
-#if defined(CONFIG_SOFTMMU)
-    mem_index = *args;
 
     addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
                                      opc, &label2_ptr, 0);
-
     tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0);
-
     tcg_finish_qemu_ldst(s, label2_ptr);
 #else
+    TCGReg index_reg;
+    tcg_target_long disp;
+
     tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
     tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
 #endif
 }
 
-static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, TCGMemOp opc)
+static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
+                            TCGMemOp opc, int mem_index)
 {
-    TCGReg addr_reg, data_reg;
 #if defined(CONFIG_SOFTMMU)
-    int mem_index;
     tcg_insn_unit *label2_ptr;
-#else
-    TCGReg index_reg;
-    tcg_target_long disp;
-#endif
-
-    data_reg = *args++;
-    addr_reg = *args++;
-
-#if defined(CONFIG_SOFTMMU)
-    mem_index = *args;
 
     addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
                                      opc, &label2_ptr, 1);
-
     tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0);
-
     tcg_finish_qemu_ldst(s, label2_ptr);
 #else
+    TCGReg index_reg;
+    tcg_target_long disp;
+
     tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
     tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
 #endif
@@ -1797,37 +1774,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                      args[2], const_args[2], args[3]);
         break;
 
-    case INDEX_op_qemu_ld8u:
-        tcg_out_qemu_ld(s, args, MO_UB);
-        break;
-    case INDEX_op_qemu_ld8s:
-        tcg_out_qemu_ld(s, args, MO_SB);
-        break;
-    case INDEX_op_qemu_ld16u:
-        tcg_out_qemu_ld(s, args, MO_TEUW);
-        break;
-    case INDEX_op_qemu_ld16s:
-        tcg_out_qemu_ld(s, args, MO_TESW);
-        break;
-    case INDEX_op_qemu_ld32:
+    case INDEX_op_qemu_ld_i32:
         /* ??? Technically we can use a non-extending instruction.  */
-        tcg_out_qemu_ld(s, args, MO_TEUL);
-        break;
-    case INDEX_op_qemu_ld64:
-        tcg_out_qemu_ld(s, args, MO_TEQ);
-        break;
-
-    case INDEX_op_qemu_st8:
-        tcg_out_qemu_st(s, args, MO_UB);
-        break;
-    case INDEX_op_qemu_st16:
-        tcg_out_qemu_st(s, args, MO_TEUW);
-        break;
-    case INDEX_op_qemu_st32:
-        tcg_out_qemu_st(s, args, MO_TEUL);
+    case INDEX_op_qemu_ld_i64:
+        tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3]);
         break;
-    case INDEX_op_qemu_st64:
-        tcg_out_qemu_st(s, args, MO_TEQ);
+    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_st_i64:
+        tcg_out_qemu_st(s, args[0], args[1], args[2], args[3]);
         break;
 
     case INDEX_op_ld16s_i64:
@@ -2023,13 +1977,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                      args[2], const_args[2], args[3]);
         break;
 
-    case INDEX_op_qemu_ld32u:
-        tcg_out_qemu_ld(s, args, MO_TEUL);
-        break;
-    case INDEX_op_qemu_ld32s:
-        tcg_out_qemu_ld(s, args, MO_TESL);
-        break;
-
     OP_32_64(deposit):
         tgen_deposit(s, args[0], args[2], args[3], args[4]);
         break;
@@ -2094,17 +2041,10 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_movcond_i32, { "r", "r", "rC", "r", "0" } },
     { INDEX_op_deposit_i32, { "r", "0", "r" } },
 
-    { INDEX_op_qemu_ld8u, { "r", "L" } },
-    { INDEX_op_qemu_ld8s, { "r", "L" } },
-    { INDEX_op_qemu_ld16u, { "r", "L" } },
-    { INDEX_op_qemu_ld16s, { "r", "L" } },
-    { INDEX_op_qemu_ld32, { "r", "L" } },
-    { INDEX_op_qemu_ld64, { "r", "L" } },
-
-    { INDEX_op_qemu_st8, { "L", "L" } },
-    { INDEX_op_qemu_st16, { "L", "L" } },
-    { INDEX_op_qemu_st32, { "L", "L" } },
-    { INDEX_op_qemu_st64, { "L", "L" } },
+    { INDEX_op_qemu_ld_i32, { "r", "L" } },
+    { INDEX_op_qemu_ld_i64, { "r", "L" } },
+    { INDEX_op_qemu_st_i32, { "L", "L" } },
+    { INDEX_op_qemu_st_i64, { "L", "L" } },
 
     { INDEX_op_ld8u_i64, { "r", "r" } },
     { INDEX_op_ld8s_i64, { "r", "r" } },
@@ -2159,9 +2099,6 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } },
     { INDEX_op_deposit_i64, { "r", "0", "r" } },
 
-    { INDEX_op_qemu_ld32u, { "r", "L" } },
-    { INDEX_op_qemu_ld32s, { "r", "L" } },
-
     { -1 },
 };
 
@@ -2196,6 +2133,9 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
+    /* The r6 register is technically call-saved, but it's also a parameter
+       register, so it can get killed by setup for the qemu_st helper.  */
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
     /* The return register can be considered call-clobbered.  */
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
 
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 5bf733e..ad2c6dd 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -100,7 +100,7 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_muluh_i64        0
 #define TCG_TARGET_HAS_mulsh_i64        0
 
-#define TCG_TARGET_HAS_new_ldst         0
+#define TCG_TARGET_HAS_new_ldst         1
 
 extern bool tcg_target_deposit_valid(int ofs, int len);
 #define TCG_TARGET_deposit_i32_valid  tcg_target_deposit_valid
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH 4/9] tcg-s390: Move ldst helpers out of line
  2014-05-03 14:08 [Qemu-devel] [PATCH 0/9] tcg/s390 improvements Richard Henderson
                   ` (2 preceding siblings ...)
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 3/9] tcg-s390: Convert to new ldst opcodes Richard Henderson
@ 2014-05-03 14:08 ` Richard Henderson
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 5/9] tcg-s390: Use more risbg in the tlb sequence Richard Henderson
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2014-05-03 14:08 UTC (permalink / raw)
  To: qemu-devel

That is, the old LDST_OPTIMIZATION.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/s390/tcg-target.c | 210 ++++++++++++++++++++++++++++----------------------
 1 file changed, 118 insertions(+), 92 deletions(-)

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 332822f..6e6c74a 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -24,7 +24,7 @@
  * THE SOFTWARE.
  */
 
-#include "tcg-be-null.h"
+#include "tcg-be-ldst.h"
 
 /* We only support generating code for 64-bit mode.  */
 #if TCG_TARGET_REG_BITS != 64
@@ -1386,107 +1386,123 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data,
 }
 
 #if defined(CONFIG_SOFTMMU)
-static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
-                                    TCGReg addr_reg, int mem_index, int opc,
-                                    tcg_insn_unit **label2_ptr_p, int is_store)
+/* We're expecting to use a 20-bit signed offset on the tlb memory ops.
+   Using the offset of the second entry in the last tlb table ensures
+   that we can index all of the elements of the first entry.  */
+QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
+                  > 0x7ffff);
+
+/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
+   addend into R2.  Returns a register with the santitized guest address.  */
+static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc,
+                               int mem_index, bool is_ld)
 {
-    const TCGReg arg0 = tcg_target_call_iarg_regs[0];
-    const TCGReg arg1 = tcg_target_call_iarg_regs[1];
-    const TCGReg arg2 = tcg_target_call_iarg_regs[2];
-    const TCGReg arg3 = tcg_target_call_iarg_regs[3];
-    const TCGReg arg4 = tcg_target_call_iarg_regs[4];
     TCGMemOp s_bits = opc & MO_SIZE;
-    tcg_insn_unit *label1_ptr;
-    tcg_target_long ofs;
+    int ofs;
+
+    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
+                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
     if (TARGET_LONG_BITS == 32) {
-        tgen_ext32u(s, arg1, addr_reg);
+        tgen_ext32u(s, TCG_REG_R3, addr_reg);
     } else {
-        tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg);
+        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
     }
 
-    tcg_out_sh64(s, RSY_SRLG, arg2, addr_reg, TCG_REG_NONE,
-                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
-
-    tgen_andi(s, TCG_TYPE_I64, arg1, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    tgen_andi(s, TCG_TYPE_I64, arg2, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+    tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2,
+              (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+    tgen_andi(s, TCG_TYPE_I64, TCG_REG_R3,
+              TARGET_PAGE_MASK | ((1 << s_bits) - 1));
 
-    if (is_store) {
-        ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
-    } else {
+    if (is_ld) {
         ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
+    } else {
+        ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
     }
-    assert(ofs < 0x80000);
-
     if (TARGET_LONG_BITS == 32) {
-        tcg_out_mem(s, RX_C, RXY_CY, arg1, arg2, TCG_AREG0, ofs);
+        tcg_out_mem(s, RX_C, RXY_CY, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs);
     } else {
-        tcg_out_mem(s, 0, RXY_CG, arg1, arg2, TCG_AREG0, ofs);
+        tcg_out_mem(s, 0, RXY_CG, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs);
     }
 
+    ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+    tcg_out_mem(s, 0, RXY_LG, TCG_REG_R2, TCG_REG_R2, TCG_AREG0, ofs);
+
     if (TARGET_LONG_BITS == 32) {
-        tgen_ext32u(s, arg1, addr_reg);
-    } else {
-        tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg);
-    }
-
-    label1_ptr = s->code_ptr;
-
-    /* je label1 (offset will be patched in later) */
-    tcg_out_insn(s, RI, BRC, S390_CC_EQ, 0);
-
-    /* call load/store helper */
-    if (is_store) {
-        /* Make sure to zero-extend the value to the full register
-           for the calling convention.  */
-        switch (s_bits) {
-        case MO_UB:
-            tgen_ext8u(s, TCG_TYPE_I64, arg2, data_reg);
-            break;
-        case MO_UW:
-            tgen_ext16u(s, TCG_TYPE_I64, arg2, data_reg);
-            break;
-        case MO_UL:
-            tgen_ext32u(s, arg2, data_reg);
-            break;
-        case MO_Q:
-            tcg_out_mov(s, TCG_TYPE_I64, arg2, data_reg);
-            break;
-        default:
-            tcg_abort();
-        }
-        tcg_out_movi(s, TCG_TYPE_I32, arg3, mem_index);
-        tcg_out_mov(s, TCG_TYPE_PTR, arg0, TCG_AREG0);
-        tcg_out_movi(s, TCG_TYPE_PTR, arg4, (uintptr_t)s->code_ptr);
-        tcg_out_call(s, qemu_st_helpers[opc]);
-    } else {
-        tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
-        tcg_out_mov(s, TCG_TYPE_PTR, arg0, TCG_AREG0);
-        tcg_out_movi(s, TCG_TYPE_PTR, arg3, (uintptr_t)s->code_ptr);
-        tcg_out_call(s, qemu_ld_helpers[opc]);
-        tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
+        tgen_ext32u(s, TCG_REG_R3, addr_reg);
+        return TCG_REG_R3;
     }
+    return addr_reg;
+}
 
-    /* jump to label2 (end) */
-    *label2_ptr_p = s->code_ptr;
-
-    tcg_out_insn(s, RI, BRC, S390_CC_ALWAYS, 0);
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
+                                TCGReg data, TCGReg addr, int mem_index,
+                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
+{
+    TCGLabelQemuLdst *label = new_ldst_label(s);
+
+    label->is_ld = is_ld;
+    label->opc = opc;
+    label->datalo_reg = data;
+    label->addrlo_reg = addr;
+    label->mem_index = mem_index;
+    label->raddr = raddr;
+    label->label_ptr[0] = label_ptr;
+}
 
-    /* this is label1, patch branch */
-    label1_ptr[1] = s->code_ptr - label1_ptr;
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+    TCGReg addr_reg = lb->addrlo_reg;
+    TCGReg data_reg = lb->datalo_reg;
+    TCGMemOp opc = lb->opc;
 
-    ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
-    assert(ofs < 0x80000);
+    patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2);
 
-    tcg_out_mem(s, 0, RXY_AG, arg1, arg2, TCG_AREG0, ofs);
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
+    if (TARGET_LONG_BITS == 64) {
+        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
+    }
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, lb->mem_index);
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
+    tcg_out_call(s, qemu_ld_helpers[opc]);
+    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
 
-    return arg1;
+    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
 }
 
-static void tcg_finish_qemu_ldst(TCGContext* s, tcg_insn_unit *label2_ptr)
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-    /* patch branch */
-    label2_ptr[1] = s->code_ptr - label2_ptr;
+    TCGReg addr_reg = lb->addrlo_reg;
+    TCGReg data_reg = lb->datalo_reg;
+    TCGMemOp opc = lb->opc;
+
+    patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2);
+
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
+    if (TARGET_LONG_BITS == 64) {
+        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
+    }
+    switch (opc & MO_SIZE) {
+    case MO_UB:
+        tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
+        break;
+    case MO_UW:
+        tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
+        break;
+    case MO_UL:
+        tgen_ext32u(s, TCG_REG_R4, data_reg);
+        break;
+    case MO_Q:
+        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
+        break;
+    default:
+        tcg_abort();
+    }
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index);
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
+    tcg_out_call(s, qemu_st_helpers[opc]);
+
+    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
 }
 #else
 static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
@@ -1506,18 +1522,22 @@ static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
 }
 #endif /* CONFIG_SOFTMMU */
 
-/* load data with address translation (if applicable)
-   and endianness conversion */
 static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
                             TCGMemOp opc, int mem_index)
 {
-#if defined(CONFIG_SOFTMMU)
-    tcg_insn_unit *label2_ptr;
+#ifdef CONFIG_SOFTMMU
+    tcg_insn_unit *label_ptr;
+    TCGReg base_reg;
+
+    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
+
+    label_ptr = s->code_ptr + 1;
+    tcg_out_insn(s, RI, BRC, S390_CC_NE, 0);
+
+    tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
 
-    addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
-                                     opc, &label2_ptr, 0);
-    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0);
-    tcg_finish_qemu_ldst(s, label2_ptr);
+    add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg, mem_index,
+                        s->code_ptr, label_ptr);
 #else
     TCGReg index_reg;
     tcg_target_long disp;
@@ -1530,13 +1550,19 @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
 static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
                             TCGMemOp opc, int mem_index)
 {
-#if defined(CONFIG_SOFTMMU)
-    tcg_insn_unit *label2_ptr;
+#ifdef CONFIG_SOFTMMU
+    tcg_insn_unit *label_ptr;
+    TCGReg base_reg;
+
+    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
+
+    label_ptr = s->code_ptr + 1;
+    tcg_out_insn(s, RI, BRC, S390_CC_NE, 0);
+
+    tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
 
-    addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
-                                     opc, &label2_ptr, 1);
-    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0);
-    tcg_finish_qemu_ldst(s, label2_ptr);
+    add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg, mem_index,
+                        s->code_ptr, label_ptr);
 #else
     TCGReg index_reg;
     tcg_target_long disp;
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH 5/9] tcg-s390: Use more risbg in the tlb sequence
  2014-05-03 14:08 [Qemu-devel] [PATCH 0/9] tcg/s390 improvements Richard Henderson
                   ` (3 preceding siblings ...)
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 4/9] tcg-s390: Move ldst helpers out of line Richard Henderson
@ 2014-05-03 14:08 ` Richard Henderson
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 6/9] tcg-s390: Implement tcg_register_jit Richard Henderson
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2014-05-03 14:08 UTC (permalink / raw)
  To: qemu-devel

Elides two insns from the sequence.  Also note the error in the
computation of the parameters for its use as and w/ wraparound.

The resulting tlb compare sequence is satisfyingly minimal:

	risbg  %r2,%r8,51,186,56
	risbg  %r3,%r8,61,178,0
	cg     %r3,904(%r10,%r2)
	lg     %r2,920(%r10,%r2)
	jlh    tlb_miss

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/s390/tcg-target.c | 49 ++++++++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 6e6c74a..cd60c01 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -933,6 +933,20 @@ static inline bool risbg_mask(uint64_t c)
     return c == -lsb;
 }
 
+static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
+{
+    int msb, lsb;
+    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
+        /* Achieve wraparound by swapping msb and lsb.  */
+        msb = 64 - ctz64(~val);
+        lsb = clz64(~val) - 1;
+    } else {
+        msb = clz64(val);
+        lsb = 63 - ctz64(val);
+    }
+    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
+}
+
 static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
 {
     static const S390Opcode ni_insns[4] = {
@@ -980,16 +994,7 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
         }
     }
     if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) {
-        int msb, lsb;
-        if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
-            /* Achieve wraparound by swapping msb and lsb.  */
-            msb = 63 - ctz64(~val);
-            lsb = clz64(~val) + 1;
-        } else {
-            msb = clz64(val);
-            lsb = 63 - ctz64(val);
-        }
-        tcg_out_risbg(s, dest, dest, msb, lsb, 0, 1);
+        tgen_andi_risbg(s, dest, dest, val);
         return;
     }
 
@@ -1398,22 +1403,24 @@ static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc,
                                int mem_index, bool is_ld)
 {
     TCGMemOp s_bits = opc & MO_SIZE;
+    uint64_t tlb_mask = TARGET_PAGE_MASK | ((1 << s_bits) - 1);
     int ofs;
 
-    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
-                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
-
-    if (TARGET_LONG_BITS == 32) {
-        tgen_ext32u(s, TCG_REG_R3, addr_reg);
+    if (facilities & FACILITY_GEN_INST_EXT) {
+        tcg_out_risbg(s, TCG_REG_R2, addr_reg,
+                      64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS,
+                      63 - CPU_TLB_ENTRY_BITS,
+                      64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1);
+        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
     } else {
-        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
+        tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
+                     TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+        tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_R3, addr_reg);
+        tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2,
+                  (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
     }
 
-    tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2,
-              (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-    tgen_andi(s, TCG_TYPE_I64, TCG_REG_R3,
-              TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-
     if (is_ld) {
         ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
     } else {
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH 6/9] tcg-s390: Implement tcg_register_jit
  2014-05-03 14:08 [Qemu-devel] [PATCH 0/9] tcg/s390 improvements Richard Henderson
                   ` (4 preceding siblings ...)
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 5/9] tcg-s390: Use more risbg in the tlb sequence Richard Henderson
@ 2014-05-03 14:08 ` Richard Henderson
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 7/9] tcg-s390: Allow immediate operands to add2 and sub2 Richard Henderson
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2014-05-03 14:08 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/s390/tcg-target.c | 62 +++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 7 deletions(-)

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index cd60c01..2f0cdf8 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -2181,18 +2181,17 @@ static void tcg_target_init(TCGContext *s)
     tcg_add_target_add_op_defs(s390_op_defs);
 }
 
+#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
+                           + TCG_STATIC_CALL_ARGS_SIZE           \
+                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
+
 static void tcg_target_qemu_prologue(TCGContext *s)
 {
-    tcg_target_long frame_size;
-
     /* stmg %r6,%r15,48(%r15) (save registers) */
     tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
 
     /* aghi %r15,-frame_size */
-    frame_size = TCG_TARGET_CALL_STACK_OFFSET;
-    frame_size += TCG_STATIC_CALL_ARGS_SIZE;
-    frame_size += CPU_TEMP_BUF_NLONGS * sizeof(long);
-    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -frame_size);
+    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
 
     tcg_set_frame(s, TCG_REG_CALL_STACK,
                   TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
@@ -2211,8 +2210,57 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
     /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
     tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
-                 frame_size + 48);
+                 FRAME_SIZE + 48);
 
     /* br %r14 (return) */
     tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
 }
+
+typedef struct {
+    DebugFrameCIE cie;
+    DebugFrameFDEHeader fde;
+    uint8_t fde_def_cfa[4];
+    uint8_t fde_reg_ofs[18];
+} DebugFrame;
+
+/* We're expecting a 2 byte uleb128 encoded value.  */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+#define ELF_HOST_MACHINE  EM_S390
+
+static DebugFrame debug_frame = {
+    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+    .cie.id = -1,
+    .cie.version = 1,
+    .cie.code_align = 1,
+    .cie.data_align = 8,                /* sleb128 8 */
+    .cie.return_column = TCG_REG_R14,
+
+    /* Total FDE size does not include the "len" member.  */
+    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
+
+    .fde_def_cfa = {
+        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
+        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
+        (FRAME_SIZE >> 7)
+    },
+    .fde_reg_ofs = {
+        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
+        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
+        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
+        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
+        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
+        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
+        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
+        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
+        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
+    }
+};
+
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+    debug_frame.fde.func_start = (uintptr_t)buf;
+    debug_frame.fde.func_len = buf_size;
+
+    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH 7/9] tcg-s390: Allow immediate operands to add2 and sub2
  2014-05-03 14:08 [Qemu-devel] [PATCH 0/9] tcg/s390 improvements Richard Henderson
                   ` (5 preceding siblings ...)
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 6/9] tcg-s390: Implement tcg_register_jit Richard Henderson
@ 2014-05-03 14:08 ` Richard Henderson
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 8/9] tcg-s390: Improve setcond Richard Henderson
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 9/9] tcg-s390: Don't force -march=z990 Richard Henderson
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2014-05-03 14:08 UTC (permalink / raw)
  To: qemu-devel

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/s390/tcg-target.c | 64 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 52 insertions(+), 12 deletions(-)

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 2f0cdf8..54d6dc1 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -42,6 +42,7 @@
 #define TCG_CT_CONST_ORI   0x200
 #define TCG_CT_CONST_XORI  0x400
 #define TCG_CT_CONST_CMPI  0x800
+#define TCG_CT_CONST_ADLI  0x1000
 
 /* Several places within the instruction set 0 means "no register"
    rather than TCG_REG_R0.  */
@@ -403,6 +404,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         tcg_regset_clear(ct->u.regs);
         tcg_regset_set_reg(ct->u.regs, TCG_REG_R3);
         break;
+    case 'A':
+        ct->ct |= TCG_CT_CONST_ADLI;
+        break;
     case 'K':
         ct->ct |= TCG_CT_CONST_MULI;
         break;
@@ -507,6 +511,20 @@ static int tcg_match_cmpi(TCGType type, tcg_target_long val)
     }
 }
 
+/* Immediates to be used with add2/sub2.  */
+
+static int tcg_match_add2i(TCGType type, tcg_target_long val)
+{
+    if (facilities & FACILITY_EXT_IMM) {
+        if (type == TCG_TYPE_I32) {
+            return 1;
+        } else if (val >= -0xffffffffll && val <= 0xffffffffll) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
 /* Test if a constant matches the constraint. */
 static int tcg_target_const_match(tcg_target_long val, TCGType type,
                                   const TCGArgConstraint *arg_ct)
@@ -532,6 +550,8 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
         } else {
             return val == (int16_t)val;
         }
+    } else if (ct & TCG_CT_CONST_ADLI) {
+        return tcg_match_add2i(type, val);
     } else if (ct & TCG_CT_CONST_ORI) {
         return tcg_match_ori(type, val);
     } else if (ct & TCG_CT_CONST_XORI) {
@@ -1780,13 +1800,19 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_add2_i32:
-        /* ??? Make use of ALFI.  */
-        tcg_out_insn(s, RR, ALR, args[0], args[4]);
+        if (const_args[4]) {
+            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
+        } else {
+            tcg_out_insn(s, RR, ALR, args[0], args[4]);
+        }
         tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
         break;
     case INDEX_op_sub2_i32:
-        /* ??? Make use of SLFI.  */
-        tcg_out_insn(s, RR, SLR, args[0], args[4]);
+        if (const_args[4]) {
+            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
+        } else {
+            tcg_out_insn(s, RR, SLR, args[0], args[4]);
+        }
         tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
         break;
 
@@ -1987,13 +2013,27 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_add2_i64:
-        /* ??? Make use of ALGFI and SLGFI.  */
-        tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
+        if (const_args[4]) {
+            if ((int64_t)args[4] >= 0) {
+                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
+            } else {
+                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
+            }
+        } else {
+            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
+        }
         tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
         break;
     case INDEX_op_sub2_i64:
-        /* ??? Make use of ALGFI and SLGFI.  */
-        tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
+        if (const_args[4]) {
+            if ((int64_t)args[4] >= 0) {
+                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
+            } else {
+                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
+            }
+        } else {
+            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
+        }
         tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
         break;
 
@@ -2066,8 +2106,8 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_bswap16_i32, { "r", "r" } },
     { INDEX_op_bswap32_i32, { "r", "r" } },
 
-    { INDEX_op_add2_i32, { "r", "r", "0", "1", "r", "r" } },
-    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "r", "r" } },
+    { INDEX_op_add2_i32, { "r", "r", "0", "1", "rA", "r" } },
+    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "rA", "r" } },
 
     { INDEX_op_brcond_i32, { "r", "rC" } },
     { INDEX_op_setcond_i32, { "r", "r", "rC" } },
@@ -2124,8 +2164,8 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_bswap32_i64, { "r", "r" } },
     { INDEX_op_bswap64_i64, { "r", "r" } },
 
-    { INDEX_op_add2_i64, { "r", "r", "0", "1", "r", "r" } },
-    { INDEX_op_sub2_i64, { "r", "r", "0", "1", "r", "r" } },
+    { INDEX_op_add2_i64, { "r", "r", "0", "1", "rA", "r" } },
+    { INDEX_op_sub2_i64, { "r", "r", "0", "1", "rA", "r" } },
 
     { INDEX_op_brcond_i64, { "r", "rC" } },
     { INDEX_op_setcond_i64, { "r", "r", "rC" } },
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH 8/9] tcg-s390: Improve setcond
  2014-05-03 14:08 [Qemu-devel] [PATCH 0/9] tcg/s390 improvements Richard Henderson
                   ` (6 preceding siblings ...)
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 7/9] tcg-s390: Allow immediate operands to add2 and sub2 Richard Henderson
@ 2014-05-03 14:08 ` Richard Henderson
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 9/9] tcg-s390: Don't force -march=z990 Richard Henderson
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2014-05-03 14:08 UTC (permalink / raw)
  To: qemu-devel

There are a variety of common cases for which we can use carry tricks to
avoid a conditional branch.  On very new hardware, use LOAD ON CONDITION
instead of a conditional branch.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/s390/tcg-target.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 91 insertions(+), 6 deletions(-)

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 54d6dc1..132f71a 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -1135,15 +1135,100 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
     return tcg_cond_to_s390_cond[c];
 }
 
-static void tgen_setcond(TCGContext *s, TCGType type, TCGCond c,
+static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
                          TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
 {
-    int cc = tgen_cmp(s, type, c, c1, c2, c2const);
+    int cc;
+
+    switch (cond) {
+    case TCG_COND_GTU:
+    case TCG_COND_GT:
+    do_greater:
+        /* The result of a compare has CC=2 for GT and CC=3 unused.
+           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */
+        tgen_cmp(s, type, cond, c1, c2, c2const);
+        tcg_out_movi(s, type, dest, 0);
+        tcg_out_insn(s, RRE, ALCGR, dest, dest);
+        return;
+
+    case TCG_COND_GEU:
+    do_geu:
+        /* We need "real" carry semantics, so use SUBTRACT LOGICAL
+           instead of COMPARE LOGICAL.  This needs an extra move.  */
+        tcg_out_mov(s, type, TCG_TMP0, c1);
+        if (c2const) {
+            tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
+            if (type == TCG_TYPE_I32) {
+                tcg_out_insn(s, RIL, SLFI, TCG_TMP0, c2);
+            } else {
+                tcg_out_insn(s, RIL, SLGFI, TCG_TMP0, c2);
+            }
+        } else {
+            if (type == TCG_TYPE_I32) {
+                tcg_out_insn(s, RR, SLR, TCG_TMP0, c2);
+            } else {
+                tcg_out_insn(s, RRE, SLGR, TCG_TMP0, c2);
+            }
+            tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
+        }
+        tcg_out_insn(s, RRE, ALCGR, dest, dest);
+        return;
+
+    case TCG_COND_LEU:
+    case TCG_COND_LTU:
+    case TCG_COND_LT:
+        /* Swap operands so that we can use GEU/GTU/GT.  */
+        if (c2const) {
+            tcg_out_movi(s, type, TCG_TMP0, c2);
+            c2 = c1;
+            c2const = 0;
+            c1 = TCG_TMP0;
+        } else {
+            TCGReg t = c1;
+            c1 = c2;
+            c2 = t;
+        }
+        if (cond == TCG_COND_LEU) {
+            goto do_geu;
+        }
+        cond = tcg_swap_cond(cond);
+        goto do_greater;
+
+    case TCG_COND_NE:
+        /* X != 0 is X > 0.  */
+        if (c2const && c2 == 0) {
+            cond = TCG_COND_GTU;
+            goto do_greater;
+        }
+        break;
 
-    /* Emit: r1 = 1; if (cc) goto over; r1 = 0; over:  */
-    tcg_out_movi(s, type, dest, 1);
-    tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
-    tcg_out_movi(s, type, dest, 0);
+    case TCG_COND_EQ:
+        /* X == 0 is X <= 0 is 0 >= X.  */
+        if (c2const && c2 == 0) {
+            tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0);
+            c2 = c1;
+            c2const = 0;
+            c1 = TCG_TMP0;
+            goto do_geu;
+        }
+        break;
+
+    default:
+        break;
+    }
+
+    cc = tgen_cmp(s, type, cond, c1, c2, c2const);
+    if (facilities & FACILITY_LOAD_ON_COND) {
+        /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
+        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
+        tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
+    } else {
+        /* Emit: d = 1; if (cc) goto over; d = 0; over:  */
+        tcg_out_movi(s, type, dest, 1);
+        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
+        tcg_out_movi(s, type, dest, 0);
+    }
 }
 
 static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH 9/9] tcg-s390: Don't force -march=z990
  2014-05-03 14:08 [Qemu-devel] [PATCH 0/9] tcg/s390 improvements Richard Henderson
                   ` (7 preceding siblings ...)
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 8/9] tcg-s390: Improve setcond Richard Henderson
@ 2014-05-03 14:08 ` Richard Henderson
  2014-05-05  7:42   ` Christian Borntraeger
  8 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2014-05-03 14:08 UTC (permalink / raw)
  To: qemu-devel

While we still require the LONG DISPLACEMENT facility, defaults
have moved on since then.  Don't override the system compiler,
whose default may be set to z9-109 or later.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 configure | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index 2fbec59..a73fb9f 100755
--- a/configure
+++ b/configure
@@ -1132,11 +1132,11 @@ case "$cpu" in
            CPU_CFLAGS="-m64 -mcpu=ultrasparc"
            ;;
     s390)
-           CPU_CFLAGS="-m31 -march=z990"
+           CPU_CFLAGS="-m31"
            LDFLAGS="-m31 $LDFLAGS"
            ;;
     s390x)
-           CPU_CFLAGS="-m64 -march=z990"
+           CPU_CFLAGS="-m64"
            LDFLAGS="-m64 $LDFLAGS"
            ;;
     i386)
-- 
1.9.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 9/9] tcg-s390: Don't force -march=z990
  2014-05-03 14:08 ` [Qemu-devel] [PATCH 9/9] tcg-s390: Don't force -march=z990 Richard Henderson
@ 2014-05-05  7:42   ` Christian Borntraeger
  0 siblings, 0 replies; 11+ messages in thread
From: Christian Borntraeger @ 2014-05-05  7:42 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 03/05/14 16:08, Richard Henderson wrote:
> While we still require the LONG DISPLACEMENT facility, defaults
> have moved on since then.  Don't override the system compiler,
> whose default may be set to z9-109 or later.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  configure | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/configure b/configure
> index 2fbec59..a73fb9f 100755
> --- a/configure
> +++ b/configure
> @@ -1132,11 +1132,11 @@ case "$cpu" in
>             CPU_CFLAGS="-m64 -mcpu=ultrasparc"
>             ;;
>      s390)
> -           CPU_CFLAGS="-m31 -march=z990"
> +           CPU_CFLAGS="-m31"
>             LDFLAGS="-m31 $LDFLAGS"
>             ;;
>      s390x)
> -           CPU_CFLAGS="-m64 -march=z990"
> +           CPU_CFLAGS="-m64"
>             LDFLAGS="-m64 $LDFLAGS"
>             ;;
>      i386)
> 

Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
for this patch.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2014-05-05  7:43 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-03 14:08 [Qemu-devel] [PATCH 0/9] tcg/s390 improvements Richard Henderson
2014-05-03 14:08 ` [Qemu-devel] [PATCH 1/9] tcg-s390: Convert to TCGMemOp Richard Henderson
2014-05-03 14:08 ` [Qemu-devel] [PATCH 2/9] tcg-s390: Integrate endianness into TCGMemOp Richard Henderson
2014-05-03 14:08 ` [Qemu-devel] [PATCH 3/9] tcg-s390: Convert to new ldst opcodes Richard Henderson
2014-05-03 14:08 ` [Qemu-devel] [PATCH 4/9] tcg-s390: Move ldst helpers out of line Richard Henderson
2014-05-03 14:08 ` [Qemu-devel] [PATCH 5/9] tcg-s390: Use more risbg in the tlb sequence Richard Henderson
2014-05-03 14:08 ` [Qemu-devel] [PATCH 6/9] tcg-s390: Implement tcg_register_jit Richard Henderson
2014-05-03 14:08 ` [Qemu-devel] [PATCH 7/9] tcg-s390: Allow immediate operands to add2 and sub2 Richard Henderson
2014-05-03 14:08 ` [Qemu-devel] [PATCH 8/9] tcg-s390: Improve setcond Richard Henderson
2014-05-03 14:08 ` [Qemu-devel] [PATCH 9/9] tcg-s390: Don't force -march=z990 Richard Henderson
2014-05-05  7:42   ` Christian Borntraeger

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.