All of lore.kernel.org
 help / color / mirror / Atom feed
From: Cornelia Huck <cohuck@redhat.com>
To: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-s390x@nongnu.org,
	Richard Henderson <richard.henderson@linaro.org>,
	qemu-devel@nongnu.org, David Hildenbrand <david@redhat.com>
Subject: [Qemu-devel] [PULL v2 02/34] s390x/tcg: Implement VECTOR FIND ANY ELEMENT EQUAL
Date: Fri,  7 Jun 2019 16:16:55 +0200	[thread overview]
Message-ID: <20190607141727.29018-3-cohuck@redhat.com> (raw)
In-Reply-To: <20190607141727.29018-1-cohuck@redhat.com>

From: David Hildenbrand <david@redhat.com>

Complicated stuff. Provide two different helpers for CC an !CC handling.
We might want to add more helpers later.

zero_search() and match_index() are courtesy of Richard H.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/Makefile.objs       |   2 +-
 target/s390x/helper.h            |   8 ++
 target/s390x/insn-data.def       |   5 +
 target/s390x/translate_vx.inc.c  |  30 ++++++
 target/s390x/vec_string_helper.c | 154 +++++++++++++++++++++++++++++++
 5 files changed, 198 insertions(+), 1 deletion(-)
 create mode 100644 target/s390x/vec_string_helper.c

diff --git a/target/s390x/Makefile.objs b/target/s390x/Makefile.objs
index 0316457880d8..ffdd484ef05e 100644
--- a/target/s390x/Makefile.objs
+++ b/target/s390x/Makefile.objs
@@ -1,7 +1,7 @@
 obj-y += cpu.o cpu_models.o cpu_features.o gdbstub.o interrupt.o helper.o
 obj-$(CONFIG_TCG) += translate.o cc_helper.o excp_helper.o fpu_helper.o
 obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o crypto_helper.o
-obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o
+obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o vec_string_helper.o
 obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o mmu_helper.o diag.o
 obj-$(CONFIG_SOFTMMU) += sigp.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 7755a96c3371..c45328cf73c1 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -211,6 +211,14 @@ DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
 
+/* === Vector String Instructions === */
+DEF_HELPER_FLAGS_4(gvec_vfae8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfae16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfae32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_5(gvec_vfae_cc8, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfae_cc16, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfae_cc32, void, ptr, cptr, cptr, env, i32)
+
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
 DEF_HELPER_4(diag, void, env, i32, i32, i32)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index e61475bdc483..070ce2a471e0 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1191,6 +1191,11 @@
 /* VECTOR TEST UNDER MASK */
     F(0xe7d8, VTM,     VRR_a, V,   0, 0, 0, 0, vtm, 0, IF_VEC)
 
+/* === Vector String Instructions === */
+
+/* VECTOR FIND ANY ELEMENT EQUAL */
+    F(0xe782, VFAE,    VRR_b, V,   0, 0, 0, 0, vfae, 0, IF_VEC)
+
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
     E(0xb250, CSP,     RRE,   Z,   r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV)
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 7e0bfcb1907c..ebd7a877f17d 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2353,3 +2353,33 @@ static DisasJumpType op_vtm(DisasContext *s, DisasOps *o)
     set_cc_static(s);
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vfae(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    static gen_helper_gvec_3 * const g[3] = {
+        gen_helper_gvec_vfae8,
+        gen_helper_gvec_vfae16,
+        gen_helper_gvec_vfae32,
+    };
+    static gen_helper_gvec_3_ptr * const g_cc[3] = {
+        gen_helper_gvec_vfae_cc8,
+        gen_helper_gvec_vfae_cc16,
+        gen_helper_gvec_vfae_cc32,
+    };
+    if (es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), m5, g[es]);
+    }
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c
new file mode 100644
index 000000000000..56dc89c824de
--- /dev/null
+++ b/target/s390x/vec_string_helper.c
@@ -0,0 +1,154 @@
+/*
+ * QEMU TCG support -- s390x vector string instruction support
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "internal.h"
+#include "vec.h"
+#include "tcg/tcg.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "exec/helper-proto.h"
+
+/*
+ * Returns a bit set in the MSB of each element that is zero,
+ * as defined by the mask.
+ */
+static inline uint64_t zero_search(uint64_t a, uint64_t mask)
+{
+    return ~(((a & mask) + mask) | a | mask);
+}
+
+/*
+ * Returns the byte offset for the first match, or 16 for no match.
+ */
+static inline int match_index(uint64_t c0, uint64_t c1)
+{
+    return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
+}
+
+/*
+ * Returns the number of bits composing one element.
+ */
+static uint8_t get_element_bits(uint8_t es)
+{
+    return (1 << es) * BITS_PER_BYTE;
+}
+
+/*
+ * Returns the bitmask for a single element.
+ */
+static uint64_t get_single_element_mask(uint8_t es)
+{
+    return -1ull >> (64 - get_element_bits(es));
+}
+
+/*
+ * Returns the bitmask for a single element (excluding the MSB).
+ */
+static uint64_t get_single_element_lsbs_mask(uint8_t es)
+{
+    return -1ull >> (65 - get_element_bits(es));
+}
+
+/*
+ * Returns the bitmasks for multiple elements (excluding the MSBs).
+ */
+static uint64_t get_element_lsbs_mask(uint8_t es)
+{
+    return dup_const(es, get_single_element_lsbs_mask(es));
+}
+
+static int vfae(void *v1, const void *v2, const void *v3, bool in,
+                bool rt, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    const int bits = get_element_bits(es);
+    uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
+    uint64_t first_zero = 16;
+    uint64_t first_equal;
+    int i;
+
+    a0 = s390_vec_read_element64(v2, 0);
+    a1 = s390_vec_read_element64(v2, 1);
+    b0 = s390_vec_read_element64(v3, 0);
+    b1 = s390_vec_read_element64(v3, 1);
+    e0 = 0;
+    e1 = 0;
+    /* compare against equality with every other element */
+    for (i = 0; i < 64; i += bits) {
+        t0 = rol64(b0, i);
+        t1 = rol64(b1, i);
+        e0 |= zero_search(a0 ^ t0, mask);
+        e0 |= zero_search(a0 ^ t1, mask);
+        e1 |= zero_search(a1 ^ t0, mask);
+        e1 |= zero_search(a1 ^ t1, mask);
+    }
+    /* invert the result if requested - invert only the MSBs */
+    if (in) {
+        e0 = ~e0 & ~mask;
+        e1 = ~e1 & ~mask;
+    }
+    first_equal = match_index(e0, e1);
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    if (rt) {
+        e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
+        e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
+        s390_vec_write_element64(v1, 0, e0);
+        s390_vec_write_element64(v1, 1, e1);
+    } else {
+        s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
+        s390_vec_write_element64(v1, 1, 0);
+    }
+
+    if (first_zero == 16 && first_equal == 16) {
+        return 3; /* no match */
+    } else if (first_zero == 16) {
+        return 1; /* matching elements, no match for zero */
+    } else if (first_equal < first_zero) {
+        return 2; /* matching elements before match for zero */
+    }
+    return 0; /* match for zero */
+}
+
+#define DEF_VFAE_HELPER(BITS)                                                  \
+void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool rt = extract32(simd_data(desc), 2, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                                   \
+}
+DEF_VFAE_HELPER(8)
+DEF_VFAE_HELPER(16)
+DEF_VFAE_HELPER(32)
+
+#define DEF_VFAE_CC_HELPER(BITS)                                               \
+void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3,      \
+                                CPUS390XState *env, uint32_t desc)             \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool rt = extract32(simd_data(desc), 2, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                      \
+}
+DEF_VFAE_CC_HELPER(8)
+DEF_VFAE_CC_HELPER(16)
+DEF_VFAE_CC_HELPER(32)
-- 
2.20.1



  parent reply	other threads:[~2019-06-07 16:09 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-07 14:16 [Qemu-devel] [PULL v2 00/34] s390x updates Cornelia Huck
2019-06-07 14:16 ` [Qemu-devel] [PULL v2 01/34] MAINTAINERS: cover tests/migration/s390x/ Cornelia Huck
2019-06-07 14:16 ` Cornelia Huck [this message]
2019-06-07 14:16 ` [Qemu-devel] [PULL v2 03/34] s390x/tcg: Implement VECTOR FIND ELEMENT EQUAL Cornelia Huck
2019-06-07 14:16 ` [Qemu-devel] [PULL v2 04/34] s390x/tcg: Implement VECTOR FIND ELEMENT NOT EQUAL Cornelia Huck
2019-06-07 14:16 ` [Qemu-devel] [PULL v2 05/34] s390x/tcg: Implement VECTOR ISOLATE STRING Cornelia Huck
2019-06-07 14:16 ` [Qemu-devel] [PULL v2 06/34] s390x/tcg: Implement VECTOR STRING RANGE COMPARE Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 07/34] s390x: Align vector registers to 16 bytes Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 08/34] s390x: Use uint64_t for vector registers Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 09/34] s390x/tcg: Fix max_byte detection for stfle Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 10/34] s390x/tcg: Store only the necessary amount of doublewords for STFLE Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 11/34] s390x/tcg: Introduce tcg_s390_vector_exception() Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 12/34] s390x/tcg: Export float_comp_to_cc() and float(32|64|128)_dcmask() Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 13/34] s390x/tcg: Implement VECTOR FP ADD Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 14/34] s390x/tcg: Implement VECTOR FP COMPARE (AND SIGNAL) SCALAR Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 15/34] s390x/tcg: Implement VECTOR FP COMPARE (EQUAL|HIGH|HIGH OR EQUAL) Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 16/34] s390x/tcg: Implement VECTOR FP CONVERT FROM FIXED 64-BIT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 17/34] s390x/tcg: Implement VECTOR FP CONVERT FROM LOGICAL 64-BIT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 18/34] s390x/tcg: Implement VECTOR FP CONVERT TO FIXED 64-BIT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 19/34] s390x/tcg: Implement VECTOR FP CONVERT TO LOGICAL 64-BIT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 20/34] s390x/tcg: Implement VECTOR FP DIVIDE Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 21/34] s390x/tcg: Implement VECTOR LOAD FP INTEGER Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 22/34] s390x/tcg: Implement VECTOR LOAD LENGTHENED Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 23/34] s390x/tcg: Implement VECTOR LOAD ROUNDED Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 24/34] s390x/tcg: Implement VECTOR FP MULTIPLY Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 25/34] s390x/tcg: Implement VECTOR FP MULTIPLY AND (ADD|SUBTRACT) Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 26/34] s390x/tcg: Implement VECTOR FP PERFORM SIGN OPERATION Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 27/34] s390x/tcg: Implement VECTOR FP SQUARE ROOT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 28/34] s390x/tcg: Implement VECTOR FP SUBTRACT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 29/34] s390x/tcg: Implement VECTOR FP TEST DATA CLASS IMMEDIATE Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 30/34] s390x/tcg: Allow linux-user to use vector instructions Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 31/34] s390x/tcg: We support the Vector Facility Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 32/34] s390x: Bump the "qemu" CPU model up to a stripped-down z13 Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 33/34] s390x/tcg: Use tcg_gen_gvec_bitsel for VECTOR SELECT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 34/34] linux-user: elf: ELF_HWCAP for s390x Cornelia Huck
2019-06-07 15:14 ` [Qemu-devel] [PULL v2 00/34] s390x updates Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190607141727.29018-3-cohuck@redhat.com \
    --to=cohuck@redhat.com \
    --cc=david@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-s390x@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.