All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org
Subject: [Qemu-devel] [PULL 01/65] tcg: Add field extraction primitives
Date: Tue, 10 Jan 2017 18:17:16 -0800	[thread overview]
Message-ID: <20170111021820.24416-2-rth@twiddle.net> (raw)
In-Reply-To: <20170111021820.24416-1-rth@twiddle.net>

Adds tcg_gen_extract_* and tcg_gen_sextract_* for extraction of
fixed position bitfields, much like we already have for deposit.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/README               |  20 ++-
 tcg/aarch64/tcg-target.h |   4 +
 tcg/arm/tcg-target.h     |   2 +
 tcg/i386/tcg-target.h    |   4 +
 tcg/ia64/tcg-target.h    |   4 +
 tcg/mips/tcg-target.h    |   2 +
 tcg/optimize.c           |  29 +++++
 tcg/ppc/tcg-target.h     |   4 +
 tcg/s390/tcg-target.h    |   4 +
 tcg/sparc/tcg-target.h   |   4 +
 tcg/tcg-op.c             | 323 +++++++++++++++++++++++++++++++++++++++++++++++
 tcg/tcg-op.h             |  12 ++
 tcg/tcg-opc.h            |   4 +
 tcg/tcg.h                |   8 ++
 tcg/tci/tcg-target.h     |   4 +
 15 files changed, 426 insertions(+), 2 deletions(-)

diff --git a/tcg/README b/tcg/README
index ae31388..065d9c2 100644
--- a/tcg/README
+++ b/tcg/README
@@ -314,11 +314,27 @@ The bitfield is described by POS/LEN, which are immediate values:
   LEN - the length of the bitfield
   POS - the position of the first bit, counting from the LSB
 
-For example, pos=8, len=4 indicates a 4-bit field at bit 8.
-This operation would be equivalent to
+For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field
+at bit 8.  This operation would be equivalent to
 
   dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00)
 
+* extract_i32/i64 dest, t1, pos, len
+* sextract_i32/i64 dest, t1, pos, len
+
+Extract a bitfield from T1, placing the result in DEST.
+The bitfield is described by POS/LEN, which are immediate values,
+as above for deposit.  For extract_*, the result will be extended
+to the left with zeros; for sextract_*, the result will be extended
+to the left with copies of the bitfield sign bit at pos + len - 1.
+
+For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field
+at bit 8.  This operation would be equivalent to
+
+  dest = (t1 << 20) >> 28
+
+(using an arithmetic right shift).
+
 * extrl_i64_i32 t0, t1
 
 For 64-bit hosts only, extract the low 32-bits of input T1 and place it
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index a1d101f..410c31b 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -63,6 +63,8 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_sextract_i32     0
 #define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
@@ -93,6 +95,8 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
 #define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_add2_i64         1
 #define TCG_TARGET_HAS_sub2_i64         1
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index a0e1acf..8e724be 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -80,6 +80,8 @@ extern bool use_idiv_instructions;
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_sextract_i32     0
 #define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_mulu2_i32        1
 #define TCG_TARGET_HAS_muls2_i32        1
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 524cfc6..7625188 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -94,6 +94,8 @@ extern bool have_bmi1;
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_sextract_i32     0
 #define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
@@ -124,6 +126,8 @@ extern bool have_bmi1;
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
 #define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_add2_i64         1
 #define TCG_TARGET_HAS_sub2_i64         1
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 6dddb7f..8856dc8 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -149,6 +149,10 @@ typedef enum {
 #define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_sextract_i64     0
 #define TCG_TARGET_HAS_add2_i32         0
 #define TCG_TARGET_HAS_add2_i64         0
 #define TCG_TARGET_HAS_sub2_i32         0
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index d352c97..fcc2986 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -158,6 +158,8 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_movcond_i32      use_movnz_instructions
 #define TCG_TARGET_HAS_bswap16_i32      use_mips32r2_instructions
 #define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_sextract_i32     0
 #define TCG_TARGET_HAS_ext8s_i32        use_mips32r2_instructions
 #define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions
 #define TCG_TARGET_HAS_rot_i32          use_mips32r2_instructions
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 0f13490..f41ed2c 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -878,6 +878,19 @@ void tcg_optimize(TCGContext *s)
                              temps[args[2]].mask);
             break;
 
+        CASE_OP_32_64(extract):
+            mask = extract64(temps[args[1]].mask, args[2], args[3]);
+            if (args[2] == 0) {
+                affected = temps[args[1]].mask & ~mask;
+            }
+            break;
+        CASE_OP_32_64(sextract):
+            mask = sextract64(temps[args[1]].mask, args[2], args[3]);
+            if (args[2] == 0 && (tcg_target_long)mask >= 0) {
+                affected = temps[args[1]].mask & ~mask;
+            }
+            break;
+
         CASE_OP_32_64(or):
         CASE_OP_32_64(xor):
             mask = temps[args[1]].mask | temps[args[2]].mask;
@@ -1048,6 +1061,22 @@ void tcg_optimize(TCGContext *s)
             }
             goto do_default;
 
+        CASE_OP_32_64(extract):
+            if (temp_is_const(args[1])) {
+                tmp = extract64(temps[args[1]].val, args[2], args[3]);
+                tcg_opt_gen_movi(s, op, args, args[0], tmp);
+                break;
+            }
+            goto do_default;
+
+        CASE_OP_32_64(sextract):
+            if (temp_is_const(args[1])) {
+                tmp = sextract64(temps[args[1]].val, args[2], args[3]);
+                tcg_opt_gen_movi(s, op, args, args[0], tmp);
+                break;
+            }
+            goto do_default;
+
         CASE_OP_32_64(setcond):
             tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
             if (tmp != 2) {
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index dd032f2..c765d3e 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -69,6 +69,8 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         1
 #define TCG_TARGET_HAS_nor_i32          1
 #define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_sextract_i32     0
 #define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_mulu2_i32        0
 #define TCG_TARGET_HAS_muls2_i32        0
@@ -100,6 +102,8 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i64         1
 #define TCG_TARGET_HAS_nor_i64          1
 #define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
 #define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_add2_i64         1
 #define TCG_TARGET_HAS_sub2_i64         1
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 0c1af24..9583df4 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -66,6 +66,8 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_sextract_i32     0
 #define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
@@ -95,6 +97,8 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
 #define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_add2_i64         1
 #define TCG_TARGET_HAS_sub2_i64         1
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 88f9c90..a212167 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -111,6 +111,8 @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_sextract_i32     0
 #define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
@@ -141,6 +143,8 @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
 #define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_add2_i64         1
 #define TCG_TARGET_HAS_sub2_i64         1
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 6e2fb35..c185b9c 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -560,6 +560,131 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
     tcg_temp_free_i32(t1);
 }
 
+void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
+                         unsigned int ofs, unsigned int len)
+{
+    tcg_debug_assert(ofs < 32);
+    tcg_debug_assert(len > 0);
+    tcg_debug_assert(len <= 32);
+    tcg_debug_assert(ofs + len <= 32);
+
+    /* Canonicalize certain special cases, even if extract is supported.  */
+    if (ofs + len == 32) {
+        tcg_gen_shri_i32(ret, arg, 32 - len);
+        return;
+    }
+    if (ofs == 0) {
+        tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
+        return;
+    }
+
+    if (TCG_TARGET_HAS_extract_i32
+        && TCG_TARGET_extract_i32_valid(ofs, len)) {
+        tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
+        return;
+    }
+
+    /* Assume that zero-extension, if available, is cheaper than a shift.  */
+    switch (ofs + len) {
+    case 16:
+        if (TCG_TARGET_HAS_ext16u_i32) {
+            tcg_gen_ext16u_i32(ret, arg);
+            tcg_gen_shri_i32(ret, ret, ofs);
+            return;
+        }
+        break;
+    case 8:
+        if (TCG_TARGET_HAS_ext8u_i32) {
+            tcg_gen_ext8u_i32(ret, arg);
+            tcg_gen_shri_i32(ret, ret, ofs);
+            return;
+        }
+        break;
+    }
+
+    /* ??? Ideally we'd know what values are available for immediate AND.
+       Assume that 8 bits are available, plus the special case of 16,
+       so that we get ext8u, ext16u.  */
+    switch (len) {
+    case 1 ... 8: case 16:
+        tcg_gen_shri_i32(ret, arg, ofs);
+        tcg_gen_andi_i32(ret, ret, (1u << len) - 1);
+        break;
+    default:
+        tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
+        tcg_gen_shri_i32(ret, ret, 32 - len);
+        break;
+    }
+}
+
+void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
+                          unsigned int ofs, unsigned int len)
+{
+    tcg_debug_assert(ofs < 32);
+    tcg_debug_assert(len > 0);
+    tcg_debug_assert(len <= 32);
+    tcg_debug_assert(ofs + len <= 32);
+
+    /* Canonicalize certain special cases, even if extract is supported.  */
+    if (ofs + len == 32) {
+        tcg_gen_sari_i32(ret, arg, 32 - len);
+        return;
+    }
+    if (ofs == 0) {
+        switch (len) {
+        case 16:
+            tcg_gen_ext16s_i32(ret, arg);
+            return;
+        case 8:
+            tcg_gen_ext8s_i32(ret, arg);
+            return;
+        }
+    }
+
+    if (TCG_TARGET_HAS_sextract_i32
+        && TCG_TARGET_extract_i32_valid(ofs, len)) {
+        tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
+        return;
+    }
+
+    /* Assume that sign-extension, if available, is cheaper than a shift.  */
+    switch (ofs + len) {
+    case 16:
+        if (TCG_TARGET_HAS_ext16s_i32) {
+            tcg_gen_ext16s_i32(ret, arg);
+            tcg_gen_sari_i32(ret, ret, ofs);
+            return;
+        }
+        break;
+    case 8:
+        if (TCG_TARGET_HAS_ext8s_i32) {
+            tcg_gen_ext8s_i32(ret, arg);
+            tcg_gen_sari_i32(ret, ret, ofs);
+            return;
+        }
+        break;
+    }
+    switch (len) {
+    case 16:
+        if (TCG_TARGET_HAS_ext16s_i32) {
+            tcg_gen_shri_i32(ret, arg, ofs);
+            tcg_gen_ext16s_i32(ret, ret);
+            return;
+        }
+        break;
+    case 8:
+        if (TCG_TARGET_HAS_ext8s_i32) {
+            tcg_gen_shri_i32(ret, arg, ofs);
+            tcg_gen_ext8s_i32(ret, ret);
+            return;
+        }
+        break;
+    }
+
+    tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
+    tcg_gen_sari_i32(ret, ret, 32 - len);
+}
+
 void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
                          TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2)
 {
@@ -1635,6 +1760,204 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
     tcg_temp_free_i64(t1);
 }
 
+void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
+                         unsigned int ofs, unsigned int len)
+{
+    tcg_debug_assert(ofs < 64);
+    tcg_debug_assert(len > 0);
+    tcg_debug_assert(len <= 64);
+    tcg_debug_assert(ofs + len <= 64);
+
+    /* Canonicalize certain special cases, even if extract is supported.  */
+    if (ofs + len == 64) {
+        tcg_gen_shri_i64(ret, arg, 64 - len);
+        return;
+    }
+    if (ofs == 0) {
+        tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
+        return;
+    }
+
+    if (TCG_TARGET_REG_BITS == 32) {
+        /* Look for a 32-bit extract within one of the two words.  */
+        if (ofs >= 32) {
+            tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
+            tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+            return;
+        }
+        if (ofs + len <= 32) {
+            tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
+            tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+            return;
+        }
+        /* The field is split across two words.  One double-word
+           shift is better than two double-word shifts.  */
+        goto do_shift_and;
+    }
+
+    if (TCG_TARGET_HAS_extract_i64
+        && TCG_TARGET_extract_i64_valid(ofs, len)) {
+        tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
+        return;
+    }
+
+    /* Assume that zero-extension, if available, is cheaper than a shift.  */
+    switch (ofs + len) {
+    case 32:
+        if (TCG_TARGET_HAS_ext32u_i64) {
+            tcg_gen_ext32u_i64(ret, arg);
+            tcg_gen_shri_i64(ret, ret, ofs);
+            return;
+        }
+        break;
+    case 16:
+        if (TCG_TARGET_HAS_ext16u_i64) {
+            tcg_gen_ext16u_i64(ret, arg);
+            tcg_gen_shri_i64(ret, ret, ofs);
+            return;
+        }
+        break;
+    case 8:
+        if (TCG_TARGET_HAS_ext8u_i64) {
+            tcg_gen_ext8u_i64(ret, arg);
+            tcg_gen_shri_i64(ret, ret, ofs);
+            return;
+        }
+        break;
+    }
+
+    /* ??? Ideally we'd know what values are available for immediate AND.
+       Assume that 8 bits are available, plus the special cases of 16 and 32,
+       so that we get ext8u, ext16u, and ext32u.  */
+    switch (len) {
+    case 1 ... 8: case 16: case 32:
+    do_shift_and:
+        tcg_gen_shri_i64(ret, arg, ofs);
+        tcg_gen_andi_i64(ret, ret, (1ull << len) - 1);
+        break;
+    default:
+        tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
+        tcg_gen_shri_i64(ret, ret, 64 - len);
+        break;
+    }
+}
+
+void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
+                          unsigned int ofs, unsigned int len)
+{
+    tcg_debug_assert(ofs < 64);
+    tcg_debug_assert(len > 0);
+    tcg_debug_assert(len <= 64);
+    tcg_debug_assert(ofs + len <= 64);
+
+    /* Canonicalize certain special cases, even if sextract is supported.  */
+    if (ofs + len == 64) {
+        tcg_gen_sari_i64(ret, arg, 64 - len);
+        return;
+    }
+    if (ofs == 0) {
+        switch (len) {
+        case 32:
+            tcg_gen_ext32s_i64(ret, arg);
+            return;
+        case 16:
+            tcg_gen_ext16s_i64(ret, arg);
+            return;
+        case 8:
+            tcg_gen_ext8s_i64(ret, arg);
+            return;
+        }
+    }
+
+    if (TCG_TARGET_REG_BITS == 32) {
+        /* Look for a 32-bit extract within one of the two words.  */
+        if (ofs >= 32) {
+            tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
+        } else if (ofs + len <= 32) {
+            tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
+        } else if (ofs == 0) {
+            tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+            tcg_gen_sextract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), 0, len - 32);
+            return;
+        } else if (len > 32) {
+            TCGv_i32 t = tcg_temp_new_i32();
+            /* Extract the bits for the high word normally.  */
+            tcg_gen_sextract_i32(t, TCGV_HIGH(arg), ofs + 32, len - 32);
+            /* Shift the field down for the low part.  */
+            tcg_gen_shri_i64(ret, arg, ofs);
+            /* Overwrite the shift into the high part.  */
+            tcg_gen_mov_i32(TCGV_HIGH(ret), t);
+            tcg_temp_free_i32(t);
+            return;
+        } else {
+            /* Shift the field down for the low part, such that the
+               field sits at the MSB.  */
+            tcg_gen_shri_i64(ret, arg, ofs + len - 32);
+            /* Shift the field down from the MSB, sign extending.  */
+            tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_LOW(ret), 32 - len);
+        }
+        /* Sign-extend the field from 32 bits.  */
+        tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+        return;
+    }
+
+    if (TCG_TARGET_HAS_sextract_i64
+        && TCG_TARGET_extract_i64_valid(ofs, len)) {
+        tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
+        return;
+    }
+
+    /* Assume that sign-extension, if available, is cheaper than a shift.  */
+    switch (ofs + len) {
+    case 32:
+        if (TCG_TARGET_HAS_ext32s_i64) {
+            tcg_gen_ext32s_i64(ret, arg);
+            tcg_gen_sari_i64(ret, ret, ofs);
+            return;
+        }
+        break;
+    case 16:
+        if (TCG_TARGET_HAS_ext16s_i64) {
+            tcg_gen_ext16s_i64(ret, arg);
+            tcg_gen_sari_i64(ret, ret, ofs);
+            return;
+        }
+        break;
+    case 8:
+        if (TCG_TARGET_HAS_ext8s_i64) {
+            tcg_gen_ext8s_i64(ret, arg);
+            tcg_gen_sari_i64(ret, ret, ofs);
+            return;
+        }
+        break;
+    }
+    switch (len) {
+    case 32:
+        if (TCG_TARGET_HAS_ext32s_i64) {
+            tcg_gen_shri_i64(ret, arg, ofs);
+            tcg_gen_ext32s_i64(ret, ret);
+            return;
+        }
+        break;
+    case 16:
+        if (TCG_TARGET_HAS_ext16s_i64) {
+            tcg_gen_shri_i64(ret, arg, ofs);
+            tcg_gen_ext16s_i64(ret, ret);
+            return;
+        }
+        break;
+    case 8:
+        if (TCG_TARGET_HAS_ext8s_i64) {
+            tcg_gen_shri_i64(ret, arg, ofs);
+            tcg_gen_ext8s_i64(ret, ret);
+            return;
+        }
+        break;
+    }
+    tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
+    tcg_gen_sari_i64(ret, ret, 64 - len);
+}
+
 void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
                          TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2)
 {
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 6d044b7..b515e6f 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -292,6 +292,10 @@ void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
 void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
                          unsigned int ofs, unsigned int len);
+void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
+                         unsigned int ofs, unsigned int len);
+void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
+                          unsigned int ofs, unsigned int len);
 void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *);
 void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *);
 void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
@@ -469,6 +473,10 @@ void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
 void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
                          unsigned int ofs, unsigned int len);
+void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
+                         unsigned int ofs, unsigned int len);
+void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
+                          unsigned int ofs, unsigned int len);
 void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *);
 void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *);
 void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
@@ -951,6 +959,8 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
 #define tcg_gen_rotr_tl tcg_gen_rotr_i64
 #define tcg_gen_rotri_tl tcg_gen_rotri_i64
 #define tcg_gen_deposit_tl tcg_gen_deposit_i64
+#define tcg_gen_extract_tl tcg_gen_extract_i64
+#define tcg_gen_sextract_tl tcg_gen_sextract_i64
 #define tcg_const_tl tcg_const_i64
 #define tcg_const_local_tl tcg_const_local_i64
 #define tcg_gen_movcond_tl tcg_gen_movcond_i64
@@ -1039,6 +1049,8 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
 #define tcg_gen_rotr_tl tcg_gen_rotr_i32
 #define tcg_gen_rotri_tl tcg_gen_rotri_i32
 #define tcg_gen_deposit_tl tcg_gen_deposit_i32
+#define tcg_gen_extract_tl tcg_gen_extract_i32
+#define tcg_gen_sextract_tl tcg_gen_sextract_i32
 #define tcg_const_tl tcg_const_i32
 #define tcg_const_local_tl tcg_const_local_i32
 #define tcg_gen_movcond_tl tcg_gen_movcond_i32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 45528d2..11563ac 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -77,6 +77,8 @@ DEF(sar_i32, 1, 2, 0, 0)
 DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
 DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
 DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32))
+DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32))
+DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32))
 
 DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END)
 
@@ -139,6 +141,8 @@ DEF(sar_i64, 1, 2, 0, IMPL64)
 DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
 DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
 DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64))
+DEF(extract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_extract_i64))
+DEF(sextract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_sextract_i64))
 
 /* size changing ops */
 DEF(ext_i32_i64, 1, 1, 0, IMPL64)
diff --git a/tcg/tcg.h b/tcg/tcg.h
index a35e4c4..5fd3733 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -112,6 +112,8 @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
 #define TCG_TARGET_HAS_movcond_i64      0
 #define TCG_TARGET_HAS_add2_i64         0
 #define TCG_TARGET_HAS_sub2_i64         0
@@ -130,6 +132,12 @@ typedef uint64_t TCGRegSet;
 #ifndef TCG_TARGET_deposit_i64_valid
 #define TCG_TARGET_deposit_i64_valid(ofs, len) 1
 #endif
+#ifndef TCG_TARGET_extract_i32_valid
+#define TCG_TARGET_extract_i32_valid(ofs, len) 1
+#endif
+#ifndef TCG_TARGET_extract_i64_valid
+#define TCG_TARGET_extract_i64_valid(ofs, len) 1
+#endif
 
 /* Only one of DIV or DIV2 should be defined.  */
 #if defined(TCG_TARGET_HAS_div_i32)
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 868228b..2065042 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -69,6 +69,8 @@
 #define TCG_TARGET_HAS_ext16u_i32       1
 #define TCG_TARGET_HAS_andc_i32         0
 #define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_sextract_i32     0
 #define TCG_TARGET_HAS_eqv_i32          0
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
@@ -88,6 +90,8 @@
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1
 #define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
 #define TCG_TARGET_HAS_div_i64          0
 #define TCG_TARGET_HAS_rem_i64          0
 #define TCG_TARGET_HAS_ext8s_i64        1
-- 
2.9.3

  reply	other threads:[~2017-01-11  2:18 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-11  2:17 [Qemu-devel] [PULL 00/65] tcg 2.9 patch queue Richard Henderson
2017-01-11  2:17 ` Richard Henderson [this message]
2017-01-11  2:17 ` [Qemu-devel] [PULL 02/65] tcg: Minor adjustments to deposit expanders Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 03/65] tcg: Add deposit_z expander Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 04/65] tcg/aarch64: Implement field extraction opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 05/65] tcg/arm: Move isa detection to tcg-target.h Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 06/65] tcg/arm: Implement field extraction opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 07/65] tcg/i386: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 08/65] tcg/mips: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 09/65] tcg/ppc: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 10/65] tcg/s390: Expose host facilities to tcg-target.h Richard Henderson
2017-01-13  9:18   ` Christian Borntraeger
2017-01-16  8:28     ` Christian Borntraeger
2017-01-11  2:17 ` [Qemu-devel] [PULL 11/65] tcg/s390: Implement field extraction opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 12/65] tcg/s390: Support deposit into zero Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 13/65] target-alpha: Use deposit and extract ops Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 14/65] target-arm: Use new " Richard Henderson
2017-01-14 19:41   ` Laszlo Ersek
2017-01-14 20:13     ` Richard Henderson
2017-01-16 23:05       ` Laszlo Ersek
2017-01-11  2:17 ` [Qemu-devel] [PULL 15/65] target-i386: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 16/65] target-mips: Use the new extract op Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 17/65] target-ppc: Use the new deposit and extract ops Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 18/65] target-s390x: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 19/65] tcg/optimize: Fold movcond 0/1 into setcond Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 20/65] tcg: Add markup for output requires new register Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 21/65] tcg: Transition flat op_defs array to a target callback Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 22/65] tcg: Pass the opcode width to target_parse_constraint Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 23/65] tcg: Allow an operand to be matching or a constant Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 24/65] tcg: Add clz and ctz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 25/65] disas/i386.c: Handle tzcnt Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 26/65] disas/ppc: Handle popcnt and cnttz Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 27/65] target-alpha: Use the ctz and clz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 28/65] target-cris: Use clz opcode Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 29/65] target-microblaze: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 30/65] target-mips: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 31/65] target-openrisc: Use clz and ctz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 32/65] target-ppc: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 33/65] target-s390x: Use clz opcode Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 34/65] target-tilegx: Use clz and ctz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 35/65] target-tricore: Use clz opcode Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 36/65] target-unicore32: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 37/65] target-xtensa: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 38/65] target-arm: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 39/65] target-i386: Use clz and ctz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 40/65] tcg/ppc: Handle ctz and clz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 41/65] tcg/aarch64: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 42/65] tcg/arm: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 43/65] tcg/mips: Handle clz opcode Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 44/65] tcg/s390: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 45/65] tcg/i386: Fuly convert tcg_target_op_def Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 46/65] tcg/i386: Hoist common arguments in tcg_out_op Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 47/65] tcg/i386: Allow bmi2 shiftx to have non-matching operands Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 48/65] tcg/i386: Handle ctz and clz opcodes Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 49/65] tcg/i386: Rely on undefined/undocumented behaviour of BSF/BSR Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 50/65] tcg: Add helpers for clrsb Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 51/65] target-arm: Use clrsb helper Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 52/65] target-tricore: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 53/65] target-xtensa: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 54/65] tcg: Add opcode for ctpop Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 55/65] target-alpha: Use ctpop helper Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 56/65] target-ppc: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 57/65] target-s390x: Avoid a loop for popcnt Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 58/65] target-sparc: Use ctpop helper Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 59/65] target-tilegx: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 60/65] target-i386: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 61/65] qemu/host-utils.h: Reduce the operation count in the fallback ctpop Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 62/65] tests: New test-bitcnt Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 63/65] tcg: Use ctpop to generate ctz if needed Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 64/65] tcg/ppc: Handle ctpop opcode Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 65/65] tcg/i386: " Richard Henderson
2017-01-11  3:39 ` [Qemu-devel] [PULL 00/65] tcg 2.9 patch queue no-reply
2017-01-12 15:57 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170111021820.24416-2-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.