All of lore.kernel.org
 help / color / mirror / Atom feed
From: matheus.ferst@eldorado.org.br
To: qemu-devel@nongnu.org, qemu-ppc@nongnu.org
Cc: danielhb413@gmail.com, richard.henderson@linaro.org,
	groug@kaod.org, clg@kaod.org,
	Matheus Ferst <matheus.ferst@eldorado.org.br>,
	david@gibson.dropbear.id.au
Subject: [PATCH v4 18/47] target/ppc: implement vgnb
Date: Tue, 22 Feb 2022 11:36:16 -0300	[thread overview]
Message-ID: <20220222143646.1268606-19-matheus.ferst@eldorado.org.br> (raw)
In-Reply-To: <20220222143646.1268606-1-matheus.ferst@eldorado.org.br>

From: Matheus Ferst <matheus.ferst@eldorado.org.br>

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
---
v4:
 - Optimized implementation (rth)
---
 target/ppc/insn32.decode            |   5 ++
 target/ppc/translate/vmx-impl.c.inc | 135 ++++++++++++++++++++++++++++
 2 files changed, 140 insertions(+)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 31a3c3b508..02df4a98e6 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -66,6 +66,9 @@
 &VX_mp          rt mp:bool vrb
 @VX_mp          ...... rt:5 .... mp:1 vrb:5 ...........         &VX_mp
 
+&VX_n           rt vrb n
+@VX_n           ...... rt:5 .. n:3 vrb:5 ...........            &VX_n
+
 &VX_tb_rc       vrt vrb rc:bool
 @VX_tb_rc       ...... vrt:5 ..... vrb:5 rc:1 ..........        &VX_tb_rc
 
@@ -418,6 +421,8 @@ VCMPUQ          000100 ... -- ..... ..... 00100000001   @VX_bf
 
 ## Vector Bit Manipulation Instruction
 
+VGNB            000100 ..... -- ... ..... 10011001100   @VX_n
+
 VCFUGED         000100 ..... ..... ..... 10101001101    @VX
 VCLZDM          000100 ..... ..... ..... 11110000100    @VX
 VCTZDM          000100 ..... ..... ..... 11111000100    @VX
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 17fc25d1bd..19219b0010 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -1416,6 +1416,141 @@ GEN_VXFORM_DUAL(vsplth, PPC_ALTIVEC, PPC_NONE,
 GEN_VXFORM_DUAL(vspltw, PPC_ALTIVEC, PPC_NONE,
                 vextractuw, PPC_NONE, PPC2_ISA300);
 
+static bool trans_VGNB(DisasContext *ctx, arg_VX_n *a)
+{
+    /*
+     * Similar to do_vextractm, we'll use a sequence of mask-shift-or operations
+     * to gather the bits. The masks can be created with
+     *
+     * uint64_t mask(uint64_t n, uint64_t step)
+     * {
+     *     uint64_t p = ((1UL << (1UL << step)) - 1UL) << ((n - 1UL) << step),
+     *                  plen = n << step, m = 0;
+     *     for(int i = 0; i < 64/plen; i++) {
+     *         m |= p;
+     *         m = ror64(m, plen);
+     *     }
+     *     p >>= plen * DIV_ROUND_UP(64, plen) - 64;
+     *     return m | p;
+     * }
+     *
+     * But since there are few values of N, we'll use a lookup table to avoid
+     * these calculations at runtime.
+     */
+    static const uint64_t mask[6][5] = {
+        {
+            0xAAAAAAAAAAAAAAAAULL, 0xccccccccccccccccULL, 0xf0f0f0f0f0f0f0f0ULL,
+            0xff00ff00ff00ff00ULL, 0xffff0000ffff0000ULL
+        },
+        {
+            0x9249249249249249ULL, 0xC30C30C30C30C30CULL, 0xF00F00F00F00F00FULL,
+            0xFF0000FF0000FF00ULL, 0xFFFF00000000FFFFULL
+        },
+        {
+            /* For N >= 4, some mask operations can be elided */
+            0x8888888888888888ULL, 0, 0xf000f000f000f000ULL, 0,
+            0xFFFF000000000000ULL
+        },
+        {
+            0x8421084210842108ULL, 0, 0xF0000F0000F0000FULL, 0, 0
+        },
+        {
+            0x8208208208208208ULL, 0, 0xF00000F00000F000ULL, 0, 0
+        },
+        {
+            0x8102040810204081ULL, 0, 0xF000000F000000F0ULL, 0, 0
+        }
+    };
+    uint64_t m;
+    int i, sh, nbits = DIV_ROUND_UP(64, a->n);
+    TCGv_i64 hi, lo, t0, t1;
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    if (a->n < 2) {
+        /*
+         * "N can be any value between 2 and 7, inclusive." Otherwise, the
+         * result is undefined, so we don't need to change RT. Also, N > 7 is
+         * impossible since the immediate field is 3 bits only.
+         */
+        return true;
+    }
+
+    hi = tcg_temp_new_i64();
+    lo = tcg_temp_new_i64();
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+
+    get_avr64(hi, a->vrb, true);
+    get_avr64(lo, a->vrb, false);
+
+    /* Align the lower doubleword so we can use the same mask */
+    tcg_gen_shli_i64(lo, lo, a->n * nbits - 64);
+
+    /*
+     * Starting from the most significant bit, gather every Nth bit with a
+     * sequence of mask-shift-or operation. E.g.: for N=3
+     * AxxBxxCxxDxxExxFxxGxxHxxIxxJxxKxxLxxMxxNxxOxxPxxQxxRxxSxxTxxUxxV
+     *     & rep(0b100)
+     * A..B..C..D..E..F..G..H..I..J..K..L..M..N..O..P..Q..R..S..T..U..V
+     *     << 2
+     * .B..C..D..E..F..G..H..I..J..K..L..M..N..O..P..Q..R..S..T..U..V..
+     *     |
+     * AB.BC.CD.DE.EF.FG.GH.HI.IJ.JK.KL.LM.MN.NO.OP.PQ.QR.RS.ST.TU.UV.V
+     *  & rep(0b110000)
+     * AB....CD....EF....GH....IJ....KL....MN....OP....QR....ST....UV..
+     *     << 4
+     * ..CD....EF....GH....IJ....KL....MN....OP....QR....ST....UV......
+     *     |
+     * ABCD..CDEF..EFGH..GHIJ..IJKL..KLMN..MNOP..OPQR..QRST..STUV..UV..
+     *     & rep(0b111100000000)
+     * ABCD........EFGH........IJKL........MNOP........QRST........UV..
+     *     << 8
+     * ....EFGH........IJKL........MNOP........QRST........UV..........
+     *     |
+     * ABCDEFGH....EFGHIJKL....IJKLMNOP....MNOPQRST....QRSTUV......UV..
+     *  & rep(0b111111110000000000000000)
+     * ABCDEFGH................IJKLMNOP................QRSTUV..........
+     *     << 16
+     * ........IJKLMNOP................QRSTUV..........................
+     *     |
+     * ABCDEFGHIJKLMNOP........IJKLMNOPQRSTUV..........QRSTUV..........
+     *     & rep(0b111111111111111100000000000000000000000000000000)
+     * ABCDEFGHIJKLMNOP................................QRSTUV..........
+     *     << 32
+     * ................QRSTUV..........................................
+     *     |
+     * ABCDEFGHIJKLMNOPQRSTUV..........................QRSTUV..........
+     */
+    for (i = 0, sh = a->n - 1; i < 5; i++, sh <<= 1) {
+        m = mask[a->n - 2][i];
+        if (m) {
+            tcg_gen_andi_i64(hi, hi, m);
+            tcg_gen_andi_i64(lo, lo, m);
+        }
+        if (sh < 64) {
+            tcg_gen_shli_i64(t0, hi, sh);
+            tcg_gen_shli_i64(t1, lo, sh);
+            tcg_gen_or_i64(hi, t0, hi);
+            tcg_gen_or_i64(lo, t1, lo);
+        }
+    }
+
+    tcg_gen_andi_i64(hi, hi, ~(~0ULL >> nbits));
+    tcg_gen_andi_i64(lo, lo, ~(~0ULL >> nbits));
+    tcg_gen_shri_i64(lo, lo, nbits);
+    tcg_gen_or_i64(hi, hi, lo);
+    tcg_gen_trunc_i64_tl(cpu_gpr[a->rt], hi);
+
+    tcg_temp_free_i64(hi);
+    tcg_temp_free_i64(lo);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+
+    return true;
+}
+
 static bool do_vextdx(DisasContext *ctx, arg_VA *a, int size, bool right,
                void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv))
 {
-- 
2.25.1



  parent reply	other threads:[~2022-02-22 16:58 UTC|newest]

Thread overview: 97+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-22 14:35 [PATCH v4 00/47] target/ppc: PowerISA Vector/VSX instruction batch matheus.ferst
2022-02-22 14:35 ` [PATCH v4 01/47] target/ppc: Introduce TRANS*FLAGS macros matheus.ferst
2022-02-22 14:36 ` [PATCH v4 02/47] target/ppc: moved vector even and odd multiplication to decodetree matheus.ferst
2022-02-22 18:19   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 03/47] target/ppc: Moved vector multiply high and low " matheus.ferst
2022-02-22 18:19   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 04/47] target/ppc: vmulh* instructions without helpers matheus.ferst
2022-02-22 18:23   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 05/47] target/ppc: Implement vmsumcud instruction matheus.ferst
2022-02-22 18:28   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 06/47] target/ppc: Implement vmsumudm instruction matheus.ferst
2022-02-22 14:36 ` [PATCH v4 07/47] target/ppc: Move vexts[bhw]2[wd] to decodetree matheus.ferst
2022-02-22 18:34   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 08/47] target/ppc: Implement vextsd2q matheus.ferst
2022-02-22 14:36 ` [PATCH v4 09/47] target/ppc: Move Vector Compare Equal/Not Equal/Greater Than to decodetree matheus.ferst
2022-02-22 18:37   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 10/47] target/ppc: Move Vector Compare Not Equal or Zero " matheus.ferst
2022-02-22 19:04   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 11/47] target/ppc: Implement Vector Compare Equal Quadword matheus.ferst
2022-02-22 19:05   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 12/47] target/ppc: Implement Vector Compare Greater Than Quadword matheus.ferst
2022-02-22 19:07   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 13/47] target/ppc: Implement Vector Compare Quadword matheus.ferst
2022-02-22 14:36 ` [PATCH v4 14/47] target/ppc: implement vstri[bh][lr] matheus.ferst
2022-02-22 19:13   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 15/47] target/ppc: implement vclrlb matheus.ferst
2022-02-22 19:15   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 16/47] target/ppc: implement vclrrb matheus.ferst
2022-02-22 19:17   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 17/47] target/ppc: implement vcntmb[bhwd] matheus.ferst
2022-02-22 14:36 ` matheus.ferst [this message]
2022-02-22 21:58   ` [PATCH v4 18/47] target/ppc: implement vgnb Richard Henderson
2022-02-22 14:36 ` [PATCH v4 19/47] target/ppc: move vs[lr][a][bhwd] to decodetree matheus.ferst
2022-02-22 22:01   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 20/47] target/ppc: implement vslq matheus.ferst
2022-02-22 22:14   ` Richard Henderson
2022-02-23 21:53     ` Matheus K. Ferst
2022-02-23 22:12       ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 21/47] target/ppc: implement vsrq matheus.ferst
2022-02-22 22:15   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 22/47] target/ppc: implement vsraq matheus.ferst
2022-02-22 22:19   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 23/47] target/ppc: move vrl[bhwd] to decodetree matheus.ferst
2022-02-22 22:20   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 24/47] target/ppc: move vrl[bhwd]nm/vrl[bhwd]mi " matheus.ferst
2022-02-22 22:30   ` Richard Henderson
2022-02-23 21:43     ` Matheus K. Ferst
2022-02-23 22:19       ` Richard Henderson
2022-02-24 20:23         ` Matheus K. Ferst
2022-02-24 21:26           ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 25/47] target/ppc: implement vrlq matheus.ferst
2022-02-22 22:33   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 26/47] target/ppc: Move vsel and vperm/vpermr to decodetree matheus.ferst
2022-02-22 22:37   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 27/47] target/ppc: Move xxsel " matheus.ferst
2022-02-22 22:38   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 28/47] target/ppc: move xxperm/xxpermr " matheus.ferst
2022-02-22 22:40   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 29/47] target/ppc: Move xxpermdi " matheus.ferst
2022-02-22 22:42   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 30/47] target/ppc: Implement xxpermx instruction matheus.ferst
2022-02-22 22:46   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 31/47] tcg/tcg-op-gvec.c: Introduce tcg_gen_gvec_4i matheus.ferst
2022-02-22 23:04   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 32/47] target/ppc: Implement xxeval matheus.ferst
2022-02-22 23:43   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 33/47] target/ppc: Implement xxgenpcv[bhwd]m instruction matheus.ferst
2022-02-22 23:48   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 34/47] target/ppc: move xs[n]madd[am][ds]p/xs[n]msub[am][ds]p to decodetree matheus.ferst
2022-02-22 23:52   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 35/47] target/ppc: implement xs[n]maddqp[o]/xs[n]msubqp[o] matheus.ferst
2022-02-22 23:56   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 36/47] target/ppc: Implement xvtlsbb instruction matheus.ferst
2022-02-23  0:07   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 37/47] target/ppc: Remove xscmpnedp instruction matheus.ferst
2022-02-22 14:36 ` [PATCH v4 38/47] target/ppc: Refactor VSX_SCALAR_CMP_DP matheus.ferst
2022-02-23  0:20   ` Richard Henderson
2022-02-24 19:16     ` Víctor Colombo
2022-02-24 21:24       ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 39/47] target/ppc: Implement xscmp{eq,ge,gt}qp matheus.ferst
2022-02-23  0:21   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 40/47] target/ppc: Move xscmp{eq,ge,gt}dp to decodetree matheus.ferst
2022-02-23  0:22   ` [PATCH v4 40/47] target/ppc: Move xscmp{eq, ge, gt}dp " Richard Henderson
2022-02-22 14:36 ` [PATCH v4 41/47] target/ppc: Move xs{max, min}[cj]dp to use do_helper_XX3 matheus.ferst
2022-02-23  0:23   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 42/47] target/ppc: Refactor VSX_MAX_MINC helper matheus.ferst
2022-02-23  0:40   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 43/47] target/ppc: Implement xs{max,min}cqp matheus.ferst
2022-02-23  0:41   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 44/47] target/ppc: Implement xvcvbf16spn and xvcvspbf16 instructions matheus.ferst
2022-02-23  3:08   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 45/47] target/ppc: implement plxsd/pstxsd matheus.ferst
2022-02-23  3:14   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 46/47] target/ppc: implement plxssp/pstxssp matheus.ferst
2022-02-23  3:16   ` Richard Henderson
2022-02-22 14:36 ` [PATCH v4 47/47] target/ppc: implement lxvr[bhwd]/stxvr[bhwd]x matheus.ferst
2022-02-23  3:23   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220222143646.1268606-19-matheus.ferst@eldorado.org.br \
    --to=matheus.ferst@eldorado.org.br \
    --cc=clg@kaod.org \
    --cc=danielhb413@gmail.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=groug@kaod.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.