All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PATCH 44/48] tcg/optimize: Optimize sign extensions
Date: Sun, 29 Aug 2021 23:24:47 -0700	[thread overview]
Message-ID: <20210830062451.639572-45-richard.henderson@linaro.org> (raw)
In-Reply-To: <20210830062451.639572-1-richard.henderson@linaro.org>

Certain targets, like riscv, produce signed 32-bit results.
This can lead to lots of redundant extensions as values are
manipulated.

Begin by tracking only the obvious sign-extensions, and
converting them to simple copies when possible.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 129 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 105 insertions(+), 24 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 334639339b..9a752fbe29 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -42,6 +42,7 @@ typedef struct TempOptInfo {
     TCGTemp *next_copy;
     uint64_t val;
     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
+    uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
 } TempOptInfo;
 
 typedef struct OptContext {
@@ -52,9 +53,37 @@ typedef struct OptContext {
     /* In flight values from optimization. */
     uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
     uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
+    uint64_t s_mask;  /* mask of clrsb(value) bits */
     TCGType type;
 } OptContext;
 
+/* Calculate the smask for a specific value. */
+static uint64_t smask_from_value(uint64_t value)
+{
+    int rep = clrsb64(value);
+    return ~(~0ull >> rep);
+}
+
+/*
+ * Calculate the smask for a given set of known-zeros.
+ * If there are lots of zeros on the left, we can consider the remainder
+ * an unsigned field, and thus the corresponding signed field is one bit
+ * larger.
+ */
+static uint64_t smask_from_zmask(uint64_t zmask)
+{
+    /*
+     * Only the 0 bits are significant for zmask, thus the msb itself
+     * must be zero, else we have no sign information.
+     */
+    int rep = clz64(zmask);
+    if (rep == 0) {
+        return 0;
+    }
+    rep -= 1;
+    return ~(~0ull >> rep);
+}
+
 static inline TempOptInfo *ts_info(TCGTemp *ts)
 {
     return ts->state_ptr;
@@ -93,6 +122,7 @@ static void reset_ts(TCGTemp *ts)
     ti->prev_copy = ts;
     ti->is_const = false;
     ti->z_mask = -1;
+    ti->s_mask = 0;
 }
 
 static void reset_temp(TCGArg arg)
@@ -123,9 +153,11 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
         ti->is_const = true;
         ti->val = ts->val;
         ti->z_mask = ts->val;
+        ti->s_mask = smask_from_value(ts->val);
     } else {
         ti->is_const = false;
         ti->z_mask = -1;
+        ti->s_mask = 0;
     }
 }
 
@@ -219,6 +251,7 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
     op->args[1] = src;
 
     di->z_mask = si->z_mask;
+    di->s_mask = si->s_mask;
 
     if (src_ts->type == dst_ts->type) {
         TempOptInfo *ni = ts_info(si->next_copy);
@@ -644,13 +677,15 @@ static void finish_folding(OptContext *ctx, TCGOp *op)
 
     nb_oargs = def->nb_oargs;
     for (i = 0; i < nb_oargs; i++) {
-        reset_temp(op->args[i]);
+        TCGTemp *ts = arg_temp(op->args[i]);
+        reset_ts(ts);
         /*
-         * Save the corresponding known-zero bits mask for the
+         * Save the corresponding known-zero/sign bits mask for the
          * first output argument (only one supported so far).
          */
         if (i == 0) {
-            arg_info(op->args[i])->z_mask = ctx->z_mask;
+            ts_info(ts)->z_mask = ctx->z_mask;
+            ts_info(ts)->s_mask = ctx->s_mask;
         }
     }
 }
@@ -700,6 +735,7 @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
 {
     uint64_t a_mask = ctx->a_mask;
     uint64_t z_mask = ctx->z_mask;
+    uint64_t s_mask = ctx->s_mask;
 
     /*
      * 32-bit ops generate 32-bit results, which for the purpose of
@@ -711,7 +747,9 @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
     if (ctx->type == TCG_TYPE_I32) {
         a_mask = (int32_t)a_mask;
         z_mask = (int32_t)z_mask;
+        s_mask = (int32_t)s_mask;
         ctx->z_mask = z_mask;
+        ctx->s_mask = s_mask;
     }
 
     if (z_mask == 0) {
@@ -1059,7 +1097,7 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
 
 static bool fold_bswap(OptContext *ctx, TCGOp *op)
 {
-    uint64_t z_mask, sign;
+    uint64_t z_mask, s_mask, sign;
 
     if (arg_is_const(op->args[1])) {
         uint64_t t = arg_info(op->args[1])->val;
@@ -1069,6 +1107,7 @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
     }
 
     z_mask = arg_info(op->args[1])->z_mask;
+
     switch (op->opc) {
     case INDEX_op_bswap16_i32:
     case INDEX_op_bswap16_i64:
@@ -1087,6 +1126,7 @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
     default:
         g_assert_not_reached();
     }
+    s_mask = smask_from_zmask(z_mask);
 
     switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
     case TCG_BSWAP_OZ:
@@ -1095,14 +1135,17 @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
         /* If the sign bit may be 1, force all the bits above to 1. */
         if (z_mask & sign) {
             z_mask |= sign;
+            s_mask = sign << 1;
         }
         break;
     default:
         /* The high bits are undefined: force all bits above the sign to 1. */
         z_mask |= sign << 1;
+        s_mask = 0;
         break;
     }
     ctx->z_mask = z_mask;
+    ctx->s_mask = s_mask;
 
     return fold_masks(ctx, op);
 }
@@ -1241,21 +1284,24 @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
 static bool fold_extract(OptContext *ctx, TCGOp *op)
 {
     uint64_t z_mask_old, z_mask;
+    int pos = op->args[2];
+    int len = op->args[3];
 
     if (arg_is_const(op->args[1])) {
         uint64_t t;
 
         t = arg_info(op->args[1])->val;
-        t = extract64(t, op->args[2], op->args[3]);
+        t = extract64(t, pos, len);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
     }
 
     z_mask_old = arg_info(op->args[1])->z_mask;
-    z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
-    if (op->args[2] == 0) {
+    z_mask = extract64(z_mask_old, pos, len);
+    if (pos == 0) {
         ctx->a_mask = z_mask_old ^ z_mask;
     }
     ctx->z_mask = z_mask;
+    ctx->s_mask = smask_from_zmask(z_mask);
 
     return fold_masks(ctx, op);
 }
@@ -1281,14 +1327,16 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
 
 static bool fold_exts(OptContext *ctx, TCGOp *op)
 {
-    uint64_t z_mask_old, z_mask, sign;
+    uint64_t s_mask_old, s_mask, z_mask, sign;
     bool type_change = false;
 
     if (fold_const1(ctx, op)) {
         return true;
     }
 
-    z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
+    z_mask = arg_info(op->args[1])->z_mask;
+    s_mask = arg_info(op->args[1])->s_mask;
+    s_mask_old = s_mask;
 
     switch (op->opc) {
     CASE_OP_32_64(ext8s):
@@ -1312,10 +1360,14 @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
 
     if (z_mask & sign) {
         z_mask |= sign;
-    } else if (!type_change) {
-        ctx->a_mask = z_mask_old ^ z_mask;
     }
+    s_mask |= sign << 1;
+
     ctx->z_mask = z_mask;
+    ctx->s_mask = s_mask;
+    if (!type_change) {
+        ctx->a_mask = s_mask & ~s_mask_old;
+    }
 
     return fold_masks(ctx, op);
 }
@@ -1354,6 +1406,7 @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
     }
 
     ctx->z_mask = z_mask;
+    ctx->s_mask = smask_from_zmask(z_mask);
     if (!type_change) {
         ctx->a_mask = z_mask_old ^ z_mask;
     }
@@ -1566,8 +1619,12 @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
     MemOp mop = get_memop(oi);
     int width = 8 << (mop & MO_SIZE);
 
-    if (!(mop & MO_SIGN) && width < 64) {
-        ctx->z_mask = MAKE_64BIT_MASK(0, width);
+    if (width < 64) {
+        ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
+        if (!(mop & MO_SIGN)) {
+            ctx->z_mask = MAKE_64BIT_MASK(0, width);
+            ctx->s_mask <<= 1;
+        }
     }
 
     /* Opcodes that touch guest memory stop the mb optimization.  */
@@ -1677,23 +1734,31 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
 
 static bool fold_sextract(OptContext *ctx, TCGOp *op)
 {
-    int64_t z_mask_old, z_mask;
+    uint64_t z_mask, s_mask, s_mask_old;
+    int pos = op->args[2];
+    int len = op->args[3];
 
     if (arg_is_const(op->args[1])) {
         uint64_t t;
 
         t = arg_info(op->args[1])->val;
-        t = sextract64(t, op->args[2], op->args[3]);
+        t = sextract64(t, pos, len);
         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
     }
 
-    z_mask_old = arg_info(op->args[1])->z_mask;
-    z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
-    if (op->args[2] == 0 && z_mask >= 0) {
-        ctx->a_mask = z_mask_old ^ z_mask;
-    }
+    z_mask = arg_info(op->args[1])->z_mask;
+    z_mask = sextract64(z_mask, pos, len);
     ctx->z_mask = z_mask;
 
+    s_mask_old = arg_info(op->args[1])->s_mask;
+    s_mask = sextract64(s_mask_old, pos, len);
+    s_mask |= MAKE_64BIT_MASK(len, 64 - len);
+    ctx->s_mask = s_mask;
+
+    if (pos == 0) {
+        ctx->a_mask = s_mask & ~s_mask_old;
+    }
+
     return fold_masks(ctx, op);
 }
 
@@ -1770,14 +1835,26 @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
 {
     /* We can't do any folding with a load, but we can record bits. */
     switch (op->opc) {
+    CASE_OP_32_64(ld8s):
+        ctx->s_mask = MAKE_64BIT_MASK(8, 56);
+        break;
     CASE_OP_32_64(ld8u):
-        ctx->z_mask = 0xff;
+        ctx->z_mask = MAKE_64BIT_MASK(0, 8);
+        ctx->s_mask = MAKE_64BIT_MASK(9, 55);
+        break;
+    CASE_OP_32_64(ld16s):
+        ctx->s_mask = MAKE_64BIT_MASK(16, 48);
         break;
     CASE_OP_32_64(ld16u):
-        ctx->z_mask = 0xffff;
+        ctx->z_mask = MAKE_64BIT_MASK(0, 16);
+        ctx->s_mask = MAKE_64BIT_MASK(17, 47);
+        break;
+    case INDEX_op_ld32s_i64:
+        ctx->s_mask = MAKE_64BIT_MASK(32, 32);
         break;
     case INDEX_op_ld32u_i64:
-        ctx->z_mask = 0xffffffffu;
+        ctx->z_mask = MAKE_64BIT_MASK(0, 32);
+        ctx->s_mask = MAKE_64BIT_MASK(33, 31);
         break;
     default:
         g_assert_not_reached();
@@ -1840,9 +1917,10 @@ void tcg_optimize(TCGContext *s)
             ctx.type = TCG_TYPE_I32;
         }
 
-        /* Assume all bits affected, and no bits known zero. */
+        /* Assume all bits affected, no bits known zero, no sign reps. */
         ctx.a_mask = -1;
         ctx.z_mask = -1;
+        ctx.s_mask = 0;
 
         /*
          * Process each opcode.
@@ -1915,8 +1993,11 @@ void tcg_optimize(TCGContext *s)
         case INDEX_op_extrh_i64_i32:
             done = fold_extu(&ctx, op);
             break;
+        CASE_OP_32_64(ld8s):
         CASE_OP_32_64(ld8u):
+        CASE_OP_32_64(ld16s):
         CASE_OP_32_64(ld16u):
+        case INDEX_op_ld32s_i64:
         case INDEX_op_ld32u_i64:
             done = fold_tcg_ld(&ctx, op);
             break;
-- 
2.25.1



  parent reply	other threads:[~2021-08-30  6:54 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-30  6:24 [PATCH 00/48] tcg: optimize redundant sign extensions Richard Henderson
2021-08-30  6:24 ` [PATCH 01/48] tcg/optimize: Rename "mask" to "z_mask" Richard Henderson
2021-08-30  9:15   ` Philippe Mathieu-Daudé
2021-08-30  6:24 ` [PATCH 02/48] tcg/optimize: Split out OptContext Richard Henderson
2021-08-30  6:24 ` [PATCH 03/48] tcg/optimize: Remove do_default label Richard Henderson
2021-08-30  6:24 ` [PATCH 04/48] tcg/optimize: Change tcg_opt_gen_{mov,movi} interface Richard Henderson
2021-08-30  6:24 ` [PATCH 05/48] tcg/optimize: Move prev_mb into OptContext Richard Henderson
2021-08-30  6:24 ` [PATCH 06/48] tcg/optimize: Split out init_arguments Richard Henderson
2021-08-30  9:17   ` Philippe Mathieu-Daudé
2021-08-30  6:24 ` [PATCH 07/48] tcg/optimize: Split out copy_propagate Richard Henderson
2021-08-30  9:18   ` Philippe Mathieu-Daudé
2021-08-30  6:24 ` [PATCH 08/48] tcg/optimize: Split out fold_call Richard Henderson
2021-08-30  6:24 ` [PATCH 09/48] tcg/optimize: Drop nb_oargs, nb_iargs locals Richard Henderson
2021-08-30  6:24 ` [PATCH 10/48] tcg/optimize: Change fail return for do_constant_folding_cond* Richard Henderson
2021-08-30  6:24 ` [PATCH 11/48] tcg/optimize: Return true from tcg_opt_gen_{mov,movi} Richard Henderson
2021-08-30  9:20   ` Philippe Mathieu-Daudé
2021-08-30  6:24 ` [PATCH 12/48] tcg/optimize: Split out finish_folding Richard Henderson
2021-08-30  6:24 ` [PATCH 13/48] tcg/optimize: Use a boolean to avoid a mass of continues Richard Henderson
2021-08-30  9:23   ` Philippe Mathieu-Daudé
2021-08-30  6:24 ` [PATCH 14/48] tcg/optimize: Split out fold_mb, fold_qemu_{ld,st} Richard Henderson
2021-08-30  6:24 ` [PATCH 15/48] tcg/optimize: Split out fold_const{1,2} Richard Henderson
2021-08-30  6:24 ` [PATCH 16/48] tcg/optimize: Split out fold_setcond2 Richard Henderson
2021-08-30  6:24 ` [PATCH 17/48] tcg/optimize: Split out fold_brcond2 Richard Henderson
2021-08-30  6:24 ` [PATCH 18/48] tcg/optimize: Split out fold_brcond Richard Henderson
2021-08-30  6:24 ` [PATCH 19/48] tcg/optimize: Split out fold_setcond Richard Henderson
2021-08-30  6:24 ` [PATCH 20/48] tcg/optimize: Split out fold_mulu2_i32 Richard Henderson
2021-08-30  6:24 ` [PATCH 21/48] tcg/optimize: Split out fold_addsub2_i32 Richard Henderson
2021-08-30  6:24 ` [PATCH 22/48] tcg/optimize: Split out fold_movcond Richard Henderson
2021-08-30  6:24 ` [PATCH 23/48] tcg/optimize: Split out fold_extract2 Richard Henderson
2021-08-30  6:24 ` [PATCH 24/48] tcg/optimize: Split out fold_extract, fold_sextract Richard Henderson
2021-08-30  6:24 ` [PATCH 25/48] tcg/optimize: Split out fold_deposit Richard Henderson
2021-08-30  6:24 ` [PATCH 26/48] tcg/optimize: Split out fold_count_zeros Richard Henderson
2021-08-30  6:24 ` [PATCH 27/48] tcg/optimize: Split out fold_bswap Richard Henderson
2021-08-30 21:17   ` Philippe Mathieu-Daudé
2021-08-30  6:24 ` [PATCH 28/48] tcg/optimize: Split out fold_dup, fold_dup2 Richard Henderson
2021-08-30  6:24 ` [PATCH 29/48] tcg/optimize: Split out fold_mov Richard Henderson
2021-08-30 21:57   ` Philippe Mathieu-Daudé
2021-08-30  6:24 ` [PATCH 30/48] tcg/optimize: Split out fold_xx_to_i Richard Henderson
2021-08-30 21:56   ` Philippe Mathieu-Daudé
2021-08-30  6:24 ` [PATCH 31/48] tcg/optimize: Split out fold_xx_to_x Richard Henderson
2021-08-30 21:55   ` Philippe Mathieu-Daudé
2021-08-30  6:24 ` [PATCH 32/48] tcg/optimize: Split out fold_xi_to_i Richard Henderson
2021-08-30 21:53   ` Philippe Mathieu-Daudé
2021-08-30  6:24 ` [PATCH 33/48] tcg/optimize: Add type to OptContext Richard Henderson
2021-08-30  6:24 ` [PATCH 34/48] tcg/optimize: Split out fold_to_not Richard Henderson
2021-08-30  6:24 ` [PATCH 35/48] tcg/optimize: Split out fold_sub_to_neg Richard Henderson
2021-08-30  6:24 ` [PATCH 36/48] tcg/optimize: Split out fold_xi_to_x Richard Henderson
2021-08-30  6:24 ` [PATCH 37/48] tcg/optimize: Split out fold_ix_to_i Richard Henderson
2021-08-30  6:24 ` [PATCH 38/48] tcg/optimize: Split out fold_masks Richard Henderson
2021-08-30  6:24 ` [PATCH 39/48] tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies Richard Henderson
2021-08-30  6:24 ` [PATCH 40/48] tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops Richard Henderson
2021-08-30 21:47   ` Philippe Mathieu-Daudé
2021-08-30  6:24 ` [PATCH 41/48] tcg/optimize: Sink commutative operand swapping into fold functions Richard Henderson
2021-08-30  6:24 ` [PATCH 42/48] tcg/optimize: Add more simplifications for orc Richard Henderson
2021-08-30  6:24 ` [PATCH 43/48] tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values Richard Henderson
2021-08-30  6:24 ` Richard Henderson [this message]
2021-08-30  6:24 ` [PATCH 45/48] tcg/optimize: Propagate sign info for logical operations Richard Henderson
2021-08-30 21:49   ` Philippe Mathieu-Daudé
2021-08-30  6:24 ` [PATCH 46/48] tcg/optimize: Propagate sign info for setcond Richard Henderson
2021-08-30  6:24 ` [PATCH 47/48] tcg/optimize: Propagate sign info for bit counting Richard Henderson
2021-08-30  6:24 ` [PATCH 48/48] tcg/optimize: Propagate sign info for shifting Richard Henderson
2021-08-30 22:00 ` [PATCH 00/48] tcg: optimize redundant sign extensions Philippe Mathieu-Daudé
2021-08-31  3:50   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210830062451.639572-45-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.