All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org
Subject: [Qemu-devel] [PULL 63/65] tcg: Use ctpop to generate ctz if needed
Date: Tue, 10 Jan 2017 18:18:18 -0800	[thread overview]
Message-ID: <20170111021820.24416-64-rth@twiddle.net> (raw)
In-Reply-To: <20170111021820.24416-1-rth@twiddle.net>

Particularly when andc is also available, this is two insns
shorter than using clz to compute ctz.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/tcg-op.c | 100 +++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 60 insertions(+), 40 deletions(-)

diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 6f4b1b6..95a39b7 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -497,33 +497,27 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
         tcg_gen_extrl_i64_i32(ret, t1);
         tcg_temp_free_i64(t1);
         tcg_temp_free_i64(t2);
-    } else if (TCG_TARGET_HAS_clz_i32) {
-        TCGv_i32 t1 = tcg_temp_new_i32();
-        TCGv_i32 t2 = tcg_temp_new_i32();
-        tcg_gen_neg_i32(t1, arg1);
-        tcg_gen_xori_i32(t2, arg2, 31);
-        tcg_gen_and_i32(t1, t1, arg1);
-        tcg_gen_clz_i32(ret, t1, t2);
-        tcg_temp_free_i32(t1);
-        tcg_temp_free_i32(t2);
-        tcg_gen_xori_i32(ret, ret, 31);
-    } else if (TCG_TARGET_HAS_clz_i64) {
-        TCGv_i32 t1 = tcg_temp_new_i32();
-        TCGv_i32 t2 = tcg_temp_new_i32();
-        TCGv_i64 x1 = tcg_temp_new_i64();
-        TCGv_i64 x2 = tcg_temp_new_i64();
-        tcg_gen_neg_i32(t1, arg1);
-        tcg_gen_xori_i32(t2, arg2, 63);
-        tcg_gen_and_i32(t1, t1, arg1);
-        tcg_gen_extu_i32_i64(x1, t1);
-        tcg_gen_extu_i32_i64(x2, t2);
-        tcg_temp_free_i32(t1);
-        tcg_temp_free_i32(t2);
-        tcg_gen_clz_i64(x1, x1, x2);
-        tcg_gen_extrl_i64_i32(ret, x1);
-        tcg_temp_free_i64(x1);
-        tcg_temp_free_i64(x2);
-        tcg_gen_xori_i32(ret, ret, 63);
+    } else if (TCG_TARGET_HAS_ctpop_i32
+               || TCG_TARGET_HAS_ctpop_i64
+               || TCG_TARGET_HAS_clz_i32
+               || TCG_TARGET_HAS_clz_i64) {
+        TCGv_i32 z, t = tcg_temp_new_i32();
+
+        if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) {
+            tcg_gen_subi_i32(t, arg1, 1);
+            tcg_gen_andc_i32(t, t, arg1);
+            tcg_gen_ctpop_i32(t, t);
+        } else {
+            /* Since all non-x86 hosts have clz(0) == 32, don't fight it.  */
+            tcg_gen_neg_i32(t, arg1);
+            tcg_gen_and_i32(t, t, arg1);
+            tcg_gen_clzi_i32(t, t, 32);
+            tcg_gen_xori_i32(t, t, 31);
+        }
+        z = tcg_const_i32(0);
+        tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t);
+        tcg_temp_free_i32(t);
+        tcg_temp_free_i32(z);
     } else {
         gen_helper_ctz_i32(ret, arg1, arg2);
     }
@@ -531,9 +525,18 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 
 void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
 {
-    TCGv_i32 t = tcg_const_i32(arg2);
-    tcg_gen_ctz_i32(ret, arg1, t);
-    tcg_temp_free_i32(t);
+    if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) {
+        /* This equivalence has the advantage of not requiring a fixup.  */
+        TCGv_i32 t = tcg_temp_new_i32();
+        tcg_gen_subi_i32(t, arg1, 1);
+        tcg_gen_andc_i32(t, t, arg1);
+        tcg_gen_ctpop_i32(ret, t);
+        tcg_temp_free_i32(t);
+    } else {
+        TCGv_i32 t = tcg_const_i32(arg2);
+        tcg_gen_ctz_i32(ret, arg1, t);
+        tcg_temp_free_i32(t);
+    }
 }
 
 void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
@@ -1842,16 +1845,24 @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
     if (TCG_TARGET_HAS_ctz_i64) {
         tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2);
-    } else if (TCG_TARGET_HAS_clz_i64) {
-        TCGv_i64 t1 = tcg_temp_new_i64();
-        TCGv_i64 t2 = tcg_temp_new_i64();
-        tcg_gen_neg_i64(t1, arg1);
-        tcg_gen_xori_i64(t2, arg2, 63);
-        tcg_gen_and_i64(t1, t1, arg1);
-        tcg_gen_clz_i64(ret, t1, t2);
-        tcg_temp_free_i64(t1);
-        tcg_temp_free_i64(t2);
-        tcg_gen_xori_i64(ret, ret, 63);
+    } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) {
+        TCGv_i64 z, t = tcg_temp_new_i64();
+
+        if (TCG_TARGET_HAS_ctpop_i64) {
+            tcg_gen_subi_i64(t, arg1, 1);
+            tcg_gen_andc_i64(t, t, arg1);
+            tcg_gen_ctpop_i64(t, t);
+        } else {
+            /* Since all non-x86 hosts have clz(0) == 64, don't fight it.  */
+            tcg_gen_neg_i64(t, arg1);
+            tcg_gen_and_i64(t, t, arg1);
+            tcg_gen_clzi_i64(t, t, 64);
+            tcg_gen_xori_i64(t, t, 63);
+        }
+        z = tcg_const_i64(0);
+        tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t);
+        tcg_temp_free_i64(t);
+        tcg_temp_free_i64(z);
     } else {
         gen_helper_ctz_i64(ret, arg1, arg2);
     }
@@ -1868,6 +1879,15 @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
         tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
         tcg_temp_free_i32(t32);
+    } else if (!TCG_TARGET_HAS_ctz_i64
+               && TCG_TARGET_HAS_ctpop_i64
+               && arg2 == 64) {
+        /* This equivalence has the advantage of not requiring a fixup.  */
+        TCGv_i64 t = tcg_temp_new_i64();
+        tcg_gen_subi_i64(t, arg1, 1);
+        tcg_gen_andc_i64(t, t, arg1);
+        tcg_gen_ctpop_i64(ret, t);
+        tcg_temp_free_i64(t);
     } else {
         TCGv_i64 t64 = tcg_const_i64(arg2);
         tcg_gen_ctz_i64(ret, arg1, t64);
-- 
2.9.3

  parent reply	other threads:[~2017-01-11  2:19 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-11  2:17 [Qemu-devel] [PULL 00/65] tcg 2.9 patch queue Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 01/65] tcg: Add field extraction primitives Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 02/65] tcg: Minor adjustments to deposit expanders Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 03/65] tcg: Add deposit_z expander Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 04/65] tcg/aarch64: Implement field extraction opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 05/65] tcg/arm: Move isa detection to tcg-target.h Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 06/65] tcg/arm: Implement field extraction opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 07/65] tcg/i386: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 08/65] tcg/mips: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 09/65] tcg/ppc: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 10/65] tcg/s390: Expose host facilities to tcg-target.h Richard Henderson
2017-01-13  9:18   ` Christian Borntraeger
2017-01-16  8:28     ` Christian Borntraeger
2017-01-11  2:17 ` [Qemu-devel] [PULL 11/65] tcg/s390: Implement field extraction opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 12/65] tcg/s390: Support deposit into zero Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 13/65] target-alpha: Use deposit and extract ops Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 14/65] target-arm: Use new " Richard Henderson
2017-01-14 19:41   ` Laszlo Ersek
2017-01-14 20:13     ` Richard Henderson
2017-01-16 23:05       ` Laszlo Ersek
2017-01-11  2:17 ` [Qemu-devel] [PULL 15/65] target-i386: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 16/65] target-mips: Use the new extract op Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 17/65] target-ppc: Use the new deposit and extract ops Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 18/65] target-s390x: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 19/65] tcg/optimize: Fold movcond 0/1 into setcond Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 20/65] tcg: Add markup for output requires new register Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 21/65] tcg: Transition flat op_defs array to a target callback Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 22/65] tcg: Pass the opcode width to target_parse_constraint Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 23/65] tcg: Allow an operand to be matching or a constant Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 24/65] tcg: Add clz and ctz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 25/65] disas/i386.c: Handle tzcnt Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 26/65] disas/ppc: Handle popcnt and cnttz Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 27/65] target-alpha: Use the ctz and clz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 28/65] target-cris: Use clz opcode Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 29/65] target-microblaze: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 30/65] target-mips: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 31/65] target-openrisc: Use clz and ctz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 32/65] target-ppc: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 33/65] target-s390x: Use clz opcode Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 34/65] target-tilegx: Use clz and ctz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 35/65] target-tricore: Use clz opcode Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 36/65] target-unicore32: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 37/65] target-xtensa: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 38/65] target-arm: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 39/65] target-i386: Use clz and ctz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 40/65] tcg/ppc: Handle ctz and clz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 41/65] tcg/aarch64: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 42/65] tcg/arm: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 43/65] tcg/mips: Handle clz opcode Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 44/65] tcg/s390: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 45/65] tcg/i386: Fuly convert tcg_target_op_def Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 46/65] tcg/i386: Hoist common arguments in tcg_out_op Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 47/65] tcg/i386: Allow bmi2 shiftx to have non-matching operands Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 48/65] tcg/i386: Handle ctz and clz opcodes Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 49/65] tcg/i386: Rely on undefined/undocumented behaviour of BSF/BSR Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 50/65] tcg: Add helpers for clrsb Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 51/65] target-arm: Use clrsb helper Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 52/65] target-tricore: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 53/65] target-xtensa: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 54/65] tcg: Add opcode for ctpop Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 55/65] target-alpha: Use ctpop helper Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 56/65] target-ppc: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 57/65] target-s390x: Avoid a loop for popcnt Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 58/65] target-sparc: Use ctpop helper Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 59/65] target-tilegx: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 60/65] target-i386: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 61/65] qemu/host-utils.h: Reduce the operation count in the fallback ctpop Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 62/65] tests: New test-bitcnt Richard Henderson
2017-01-11  2:18 ` Richard Henderson [this message]
2017-01-11  2:18 ` [Qemu-devel] [PULL 64/65] tcg/ppc: Handle ctpop opcode Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 65/65] tcg/i386: " Richard Henderson
2017-01-11  3:39 ` [Qemu-devel] [PULL 00/65] tcg 2.9 patch queue no-reply
2017-01-12 15:57 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170111021820.24416-64-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.