All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 064/114] target/arm: Implement SVE2 HISTCNT, HISTSEG
Date: Tue, 25 May 2021 16:02:34 +0100	[thread overview]
Message-ID: <20210525150324.32370-65-peter.maydell@linaro.org> (raw)
In-Reply-To: <20210525150324.32370-1-peter.maydell@linaro.org>

From: Stephen Long <steplong@quicinc.com>

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Stephen Long <steplong@quicinc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210525010358.152808-43-richard.henderson@linaro.org
Message-Id: <20200416173109.8856-1-steplong@quicinc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/helper-sve.h    |   7 ++
 target/arm/sve.decode      |   6 ++
 target/arm/sve_helper.c    | 131 +++++++++++++++++++++++++++++++++++++
 target/arm/translate-sve.c |  19 ++++++
 4 files changed, 163 insertions(+)

diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 98e6b57e386..507a2fea8e4 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -2551,6 +2551,13 @@ DEF_HELPER_FLAGS_5(sve2_nmatch_ppzz_b, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(sve2_nmatch_ppzz_h, TCG_CALL_NO_RWG,
                    i32, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(sve2_histcnt_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_histcnt_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_histseg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_h, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_s, TCG_CALL_NO_RWG,
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 388bf92acfe..8f501a083c9 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -146,6 +146,7 @@
                 &rprrr_esz rn=%reg_movprfx
 @rdn_pg_rm_ra   ........ esz:2 . ra:5  ... pg:3 rm:5 rd:5 \
                 &rprrr_esz rn=%reg_movprfx
+@rd_pg_rn_rm   ........ esz:2 . rm:5 ... pg:3 rn:5 rd:5       &rprr_esz
 
 # One register operand, with governing predicate, vector element size
 @rd_pg_rn       ........ esz:2 ... ... ... pg:3 rn:5 rd:5       &rpr_esz
@@ -1336,6 +1337,11 @@ RSUBHNT         01000101 .. 1 ..... 011 111 ..... .....  @rd_rn_rm
 MATCH           01000101 .. 1 ..... 100 ... ..... 0 .... @pd_pg_rn_rm
 NMATCH          01000101 .. 1 ..... 100 ... ..... 1 .... @pd_pg_rn_rm
 
+### SVE2 Histogram Computation
+
+HISTCNT         01000101 .. 1 ..... 110 ... ..... .....  @rd_pg_rn_rm
+HISTSEG         01000101 .. 1 ..... 101 000 ..... .....  @rd_rn_rm
+
 ## SVE2 floating-point pairwise operations
 
 FADDP           01100100 .. 010 00 0 100 ... ..... ..... @rdn_pg_rm
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 891f6ff453f..662ed80b1c4 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -7071,3 +7071,134 @@ DO_PPZZ_MATCH(sve2_nmatch_ppzz_b, MO_8, true)
 DO_PPZZ_MATCH(sve2_nmatch_ppzz_h, MO_16, true)
 
 #undef DO_PPZZ_MATCH
+
+void HELPER(sve2_histcnt_s)(void *vd, void *vn, void *vm, void *vg,
+                            uint32_t desc)
+{
+    ARMVectorReg scratch;
+    intptr_t i, j;
+    intptr_t opr_sz = simd_oprsz(desc);
+    uint32_t *d = vd, *n = vn, *m = vm;
+    uint8_t *pg = vg;
+
+    if (d == n) {
+        n = memcpy(&scratch, n, opr_sz);
+        if (d == m) {
+            m = n;
+        }
+    } else if (d == m) {
+        m = memcpy(&scratch, m, opr_sz);
+    }
+
+    for (i = 0; i < opr_sz; i += 4) {
+        uint64_t count = 0;
+        uint8_t pred;
+
+        pred = pg[H1(i >> 3)] >> (i & 7);
+        if (pred & 1) {
+            uint32_t nn = n[H4(i >> 2)];
+
+            for (j = 0; j <= i; j += 4) {
+                pred = pg[H1(j >> 3)] >> (j & 7);
+                if ((pred & 1) && nn == m[H4(j >> 2)]) {
+                    ++count;
+                }
+            }
+        }
+        d[H4(i >> 2)] = count;
+    }
+}
+
+void HELPER(sve2_histcnt_d)(void *vd, void *vn, void *vm, void *vg,
+                            uint32_t desc)
+{
+    ARMVectorReg scratch;
+    intptr_t i, j;
+    intptr_t opr_sz = simd_oprsz(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint8_t *pg = vg;
+
+    if (d == n) {
+        n = memcpy(&scratch, n, opr_sz);
+        if (d == m) {
+            m = n;
+        }
+    } else if (d == m) {
+        m = memcpy(&scratch, m, opr_sz);
+    }
+
+    for (i = 0; i < opr_sz / 8; ++i) {
+        uint64_t count = 0;
+        if (pg[H1(i)] & 1) {
+            uint64_t nn = n[i];
+            for (j = 0; j <= i; ++j) {
+                if ((pg[H1(j)] & 1) && nn == m[j]) {
+                    ++count;
+                }
+            }
+        }
+        d[i] = count;
+    }
+}
+
+/*
+ * Returns the number of bytes in m0 and m1 that match n.
+ * Unlike do_match2 we don't just need true/false, we need an exact count.
+ * This requires two extra logical operations.
+ */
+static inline uint64_t do_histseg_cnt(uint8_t n, uint64_t m0, uint64_t m1)
+{
+    const uint64_t mask = dup_const(MO_8, 0x7f);
+    uint64_t cmp0, cmp1;
+
+    cmp1 = dup_const(MO_8, n);
+    cmp0 = cmp1 ^ m0;
+    cmp1 = cmp1 ^ m1;
+
+    /*
+     * 1: clear msb of each byte to avoid carry to next byte (& mask)
+     * 2: carry in to msb if byte != 0 (+ mask)
+     * 3: set msb if cmp has msb set (| cmp)
+     * 4: set ~msb to ignore them (| mask)
+     * We now have 0xff for byte != 0 or 0x7f for byte == 0.
+     * 5: invert, resulting in 0x80 if and only if byte == 0.
+     */
+    cmp0 = ~(((cmp0 & mask) + mask) | cmp0 | mask);
+    cmp1 = ~(((cmp1 & mask) + mask) | cmp1 | mask);
+
+    /*
+     * Combine the two compares in a way that the bits do
+     * not overlap, and so preserves the count of set bits.
+     * If the host has an efficient instruction for ctpop,
+     * then ctpop(x) + ctpop(y) has the same number of
+     * operations as ctpop(x | (y >> 1)).  If the host does
+     * not have an efficient ctpop, then we only want to
+     * use it once.
+     */
+    return ctpop64(cmp0 | (cmp1 >> 1));
+}
+
+void HELPER(sve2_histseg)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, j;
+    intptr_t opr_sz = simd_oprsz(desc);
+
+    for (i = 0; i < opr_sz; i += 16) {
+        uint64_t n0 = *(uint64_t *)(vn + i);
+        uint64_t m0 = *(uint64_t *)(vm + i);
+        uint64_t n1 = *(uint64_t *)(vn + i + 8);
+        uint64_t m1 = *(uint64_t *)(vm + i + 8);
+        uint64_t out0 = 0;
+        uint64_t out1 = 0;
+
+        for (j = 0; j < 64; j += 8) {
+            uint64_t cnt0 = do_histseg_cnt(n0 >> j, m0, m1);
+            uint64_t cnt1 = do_histseg_cnt(n1 >> j, m0, m1);
+            out0 |= cnt0 << j;
+            out1 |= cnt1 << j;
+        }
+
+        *(uint64_t *)(vd + i) = out0;
+        *(uint64_t *)(vd + i + 8) = out1;
+    }
+}
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 484d4218b5f..13f84d14d3e 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -7504,6 +7504,25 @@ static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)                  \
 DO_SVE2_PPZZ_MATCH(MATCH, match)
 DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
 
+static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
+{
+    static gen_helper_gvec_4 * const fns[2] = {
+        gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
+    };
+    if (a->esz < 2) {
+        return false;
+    }
+    return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
+}
+
+static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a)
+{
+    if (a->esz != 0) {
+        return false;
+    }
+    return do_sve2_zzz_ool(s, a, gen_helper_sve2_histseg);
+}
+
 static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
                             gen_helper_gvec_4_ptr *fn)
 {
-- 
2.20.1



  parent reply	other threads:[~2021-05-25 15:34 UTC|newest]

Thread overview: 96+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-25 15:01 [PULL 000/114] target-arm queue Peter Maydell
2021-05-25 15:01 ` [PULL 001/114] hw/arm/smmuv3: Another range invalidation fix Peter Maydell
2021-05-25 15:01 ` [PULL 002/114] hw/intc/arm_gicv3_cpuif: Fix EOIR write access check logic Peter Maydell
2021-05-25 15:01 ` [PULL 003/114] hw/arm/mps2-tz: Don't duplicate modelling of SRAM in AN524 Peter Maydell
2021-05-25 15:01 ` [PULL 004/114] hw/arm/mps2-tz: Make SRAM_ADDR_WIDTH board-specific Peter Maydell
2021-05-25 15:01 ` [PULL 005/114] hw/arm/armsse.c: Correct modelling of SSE-300 internal SRAMs Peter Maydell
2021-05-25 15:01 ` [PULL 006/114] hw/arm/armsse: Convert armsse_realize() to use ERRP_GUARD Peter Maydell
2021-05-25 15:01 ` [PULL 007/114] hw/arm/mps2-tz: Allow board to specify a boot RAM size Peter Maydell
2021-05-25 15:01 ` [PULL 008/114] hw/arm: Model TCMs in the SSE-300, not the AN547 Peter Maydell
2021-05-25 15:01 ` [PULL 009/114] target/arm: Use correct SP in M-profile exception return Peter Maydell
2021-05-25 15:01 ` [PULL 010/114] accel/tcg: Replace g_new() + memcpy() by g_memdup() Peter Maydell
2021-05-25 15:01 ` [PULL 011/114] accel/tcg: Pass length argument to tlb_flush_range_locked() Peter Maydell
2021-05-25 15:01 ` [PULL 012/114] accel/tlb: Rename TLBFlushPageBitsByMMUIdxData -> TLBFlushRangeData Peter Maydell
2021-05-25 15:01 ` [PULL 013/114] accel/tcg: Remove {encode,decode}_pbm_to_runon Peter Maydell
2021-05-25 15:01 ` [PULL 014/114] accel/tcg: Add tlb_flush_range_by_mmuidx() Peter Maydell
2021-05-25 15:01 ` [PULL 015/114] accel/tcg: Add tlb_flush_range_by_mmuidx_all_cpus() Peter Maydell
2021-05-25 15:01 ` [PULL 016/114] accel/tlb: Add tlb_flush_range_by_mmuidx_all_cpus_synced() Peter Maydell
2021-05-25 15:01 ` [PULL 017/114] accel/tcg: Rename tlb_flush_page_bits -> range]_by_mmuidx_async_0 Peter Maydell
2021-05-25 15:01 ` [PULL 018/114] accel/tlb: Rename tlb_flush_[page_bits > range]_by_mmuidx_async_[2 > 1] Peter Maydell
2021-05-25 15:01 ` [PULL 019/114] target/arm: Add support for FEAT_TLBIRANGE Peter Maydell
2021-05-25 15:01 ` [PULL 020/114] target/arm: Add support for FEAT_TLBIOS Peter Maydell
2021-05-25 15:01 ` [PULL 021/114] target/arm: set ID_AA64ISAR0.TLB to 2 for max AARCH64 CPU type Peter Maydell
2021-05-25 15:01 ` [PULL 022/114] disas/libvixl: Protect C system header for C++ compiler Peter Maydell
2021-05-25 15:01 ` [PULL 023/114] target/arm: Add ID_AA64ZFR0 fields and isar_feature_aa64_sve2 Peter Maydell
2021-05-25 15:01 ` [PULL 024/114] target/arm: Implement SVE2 Integer Multiply - Unpredicated Peter Maydell
2021-05-25 15:01 ` [PULL 025/114] target/arm: Implement SVE2 integer pairwise add and accumulate long Peter Maydell
2021-05-25 15:01 ` [PULL 026/114] target/arm: Implement SVE2 integer unary operations (predicated) Peter Maydell
2021-05-25 15:01 ` [PULL 027/114] target/arm: Split out saturating/rounding shifts from neon Peter Maydell
2021-05-25 15:01 ` [PULL 028/114] target/arm: Implement SVE2 saturating/rounding bitwise shift left (predicated) Peter Maydell
2021-05-25 15:01 ` [PULL 029/114] target/arm: Implement SVE2 integer halving add/subtract (predicated) Peter Maydell
2021-05-25 15:02 ` [PULL 030/114] target/arm: Implement SVE2 integer pairwise arithmetic Peter Maydell
2021-05-25 15:02 ` [PULL 031/114] target/arm: Implement SVE2 saturating add/subtract (predicated) Peter Maydell
2021-05-25 15:02 ` [PULL 032/114] target/arm: Implement SVE2 integer add/subtract long Peter Maydell
2021-05-25 15:02 ` [PULL 033/114] target/arm: Implement SVE2 integer add/subtract interleaved long Peter Maydell
2021-05-25 15:02 ` [PULL 034/114] target/arm: Implement SVE2 integer add/subtract wide Peter Maydell
2021-05-25 15:02 ` [PULL 035/114] target/arm: Implement SVE2 integer multiply long Peter Maydell
2021-05-25 15:02 ` [PULL 036/114] target/arm: Implement SVE2 PMULLB, PMULLT Peter Maydell
2021-05-25 15:02 ` [PULL 037/114] target/arm: Implement SVE2 bitwise shift left long Peter Maydell
2021-05-25 15:02 ` [PULL 038/114] target/arm: Implement SVE2 bitwise exclusive-or interleaved Peter Maydell
2021-05-25 15:02 ` [PULL 039/114] target/arm: Implement SVE2 bitwise permute Peter Maydell
2021-05-25 15:02 ` [PULL 040/114] target/arm: Implement SVE2 complex integer add Peter Maydell
2021-05-25 15:02 ` [PULL 041/114] target/arm: Implement SVE2 integer absolute difference and accumulate long Peter Maydell
2021-05-25 15:02 ` [PULL 042/114] target/arm: Implement SVE2 integer add/subtract long with carry Peter Maydell
2021-05-25 15:02 ` [PULL 043/114] target/arm: Implement SVE2 bitwise shift right and accumulate Peter Maydell
2021-05-25 15:02 ` [PULL 044/114] target/arm: Implement SVE2 bitwise shift and insert Peter Maydell
2021-05-25 15:02 ` [PULL 045/114] target/arm: Implement SVE2 integer absolute difference and accumulate Peter Maydell
2021-05-25 15:02 ` [PULL 046/114] target/arm: Implement SVE2 saturating extract narrow Peter Maydell
2021-05-25 15:02 ` [PULL 047/114] target/arm: Implement SVE2 floating-point pairwise Peter Maydell
2021-05-25 15:02 ` [PULL 048/114] target/arm: Implement SVE2 SHRN, RSHRN Peter Maydell
2021-05-25 15:02 ` [PULL 049/114] target/arm: Implement SVE2 SQSHRUN, SQRSHRUN Peter Maydell
2021-05-25 15:02 ` [PULL 050/114] target/arm: Implement SVE2 UQSHRN, UQRSHRN Peter Maydell
2021-05-25 15:02 ` [PULL 051/114] target/arm: Implement SVE2 SQSHRN, SQRSHRN Peter Maydell
2021-05-25 15:02 ` [PULL 052/114] target/arm: Implement SVE2 WHILEGT, WHILEGE, WHILEHI, WHILEHS Peter Maydell
2021-05-25 15:02 ` [PULL 053/114] target/arm: Implement SVE2 WHILERW, WHILEWR Peter Maydell
2021-05-25 15:02 ` [PULL 054/114] target/arm: Implement SVE2 bitwise ternary operations Peter Maydell
2021-05-25 15:02 ` [PULL 055/114] target/arm: Implement SVE2 MATCH, NMATCH Peter Maydell
2021-05-25 15:02 ` [PULL 056/114] target/arm: Implement SVE2 saturating multiply-add long Peter Maydell
2021-05-25 15:02 ` [PULL 057/114] target/arm: Implement SVE2 saturating multiply-add high Peter Maydell
2021-05-25 15:02 ` [PULL 058/114] target/arm: Implement SVE2 integer multiply-add long Peter Maydell
2021-05-25 15:02 ` [PULL 059/114] target/arm: Implement SVE2 complex integer multiply-add Peter Maydell
2021-05-25 15:02 ` [PULL 060/114] target/arm: Implement SVE2 ADDHNB, ADDHNT Peter Maydell
2021-05-25 15:02 ` [PULL 061/114] target/arm: Implement SVE2 RADDHNB, RADDHNT Peter Maydell
2021-05-25 15:02 ` [PULL 062/114] target/arm: Implement SVE2 SUBHNB, SUBHNT Peter Maydell
2021-05-25 15:02 ` [PULL 063/114] target/arm: Implement SVE2 RSUBHNB, RSUBHNT Peter Maydell
2021-05-25 15:02 ` Peter Maydell [this message]
2021-05-25 15:02 ` [PULL 065/114] target/arm: Implement SVE2 XAR Peter Maydell
2021-05-25 15:02 ` [PULL 066/114] target/arm: Implement SVE2 scatter store insns Peter Maydell
2021-05-25 15:02 ` [PULL 067/114] target/arm: Implement SVE2 gather load insns Peter Maydell
2021-05-25 15:02 ` [PULL 068/114] target/arm: Implement SVE2 FMMLA Peter Maydell
2021-05-25 15:02 ` [PULL 069/114] target/arm: Implement SVE2 SPLICE, EXT Peter Maydell
2021-05-25 15:02 ` [PULL 070/114] target/arm: Use correct output type for gvec_sdot_*_b Peter Maydell
2021-05-25 15:02 ` [PULL 071/114] target/arm: Pass separate addend to {U, S}DOT helpers Peter Maydell
2021-05-25 15:02 ` [PULL 072/114] target/arm: Pass separate addend to FCMLA helpers Peter Maydell
2021-05-25 15:02 ` [PULL 073/114] target/arm: Split out formats for 2 vectors + 1 index Peter Maydell
2021-05-25 15:02 ` [PULL 074/114] target/arm: Split out formats for 3 " Peter Maydell
2021-05-25 15:02 ` [PULL 075/114] target/arm: Implement SVE2 integer multiply (indexed) Peter Maydell
2021-05-25 15:02 ` [PULL 076/114] target/arm: Implement SVE2 integer multiply-add (indexed) Peter Maydell
2021-05-25 15:02 ` [PULL 077/114] target/arm: Implement SVE2 saturating multiply-add high (indexed) Peter Maydell
2021-05-25 15:02 ` [PULL 078/114] target/arm: Implement SVE2 saturating multiply-add (indexed) Peter Maydell
2021-05-25 15:02 ` [PULL 079/114] target/arm: Implement SVE2 saturating multiply (indexed) Peter Maydell
2021-05-25 15:02 ` [PULL 080/114] target/arm: Implement SVE2 signed saturating doubling multiply high Peter Maydell
2021-05-25 15:02 ` [PULL 081/114] target/arm: Implement SVE2 saturating multiply high (indexed) Peter Maydell
2021-05-25 15:02 ` [PULL 082/114] target/arm: Implement SVE2 multiply-add long (indexed) Peter Maydell
2021-05-25 15:02 ` [PULL 083/114] target/arm: Implement SVE2 integer multiply " Peter Maydell
2021-05-25 15:02 ` [PULL 084/114] target/arm: Implement SVE2 complex integer multiply-add (indexed) Peter Maydell
2021-05-25 15:02 ` [PULL 085/114] target/arm: Implement SVE2 complex integer dot product Peter Maydell
2021-05-25 15:02 ` [PULL 086/114] target/arm: Macroize helper_gvec_{s,u}dot_{b,h} Peter Maydell
2021-05-25 15:02 ` [PULL 087/114] target/arm: Macroize helper_gvec_{s,u}dot_idx_{b,h} Peter Maydell
2021-05-25 15:02 ` [PULL 088/114] target/arm: Implement SVE mixed sign dot product (indexed) Peter Maydell
2021-05-25 15:02 ` [PULL 089/114] target/arm: Implement SVE mixed sign dot product Peter Maydell
2021-05-25 15:03 ` [PULL 090/114] target/arm: Implement SVE2 crypto unary operations Peter Maydell
2021-05-25 15:03 ` [PULL 091/114] target/arm: Implement SVE2 crypto destructive binary operations Peter Maydell
2021-05-25 15:03 ` [PULL 092/114] target/arm: Implement SVE2 crypto constructive " Peter Maydell
2021-05-25 15:03 ` [PULL 093/114] target/arm: Implement SVE2 TBL, TBX Peter Maydell
2021-05-25 15:03 ` [PULL 094/114] target/arm: Implement SVE2 FCVTNT Peter Maydell
2021-05-25 16:30 ` [PULL 000/114] target-arm queue Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210525150324.32370-65-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.