All of lore.kernel.org
 help / color / mirror / Atom feed
From: Cornelia Huck <cohuck@redhat.com>
To: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-s390x@nongnu.org,
	Richard Henderson <richard.henderson@linaro.org>,
	qemu-devel@nongnu.org, David Hildenbrand <david@redhat.com>
Subject: [Qemu-devel] [PULL 07/35] s390x/tcg: Implement VECTOR STRING RANGE COMPARE
Date: Fri,  7 Jun 2019 11:52:09 +0200	[thread overview]
Message-ID: <20190607095237.11364-8-cohuck@redhat.com> (raw)
In-Reply-To: <20190607095237.11364-1-cohuck@redhat.com>

From: David Hildenbrand <david@redhat.com>

Unfortunately, there is no easy way to avoid looping over all elements
in v2. Provide specialized variants for !cc,!rt/!cc,rt/cc,!rt/cc,rt and
all element types. Especially for different values of rt, the compiler
might be able to optimize the code a lot.

Add s390_vec_write_element().

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h            |  12 +++
 target/s390x/insn-data.def       |   2 +
 target/s390x/translate_vx.inc.c  |  59 +++++++++++++
 target/s390x/vec.h               |  21 +++++
 target/s390x/vec_string_helper.c | 143 +++++++++++++++++++++++++++++++
 5 files changed, 237 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 1f9f0b463bdb..5db67779d3c6 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -236,6 +236,18 @@ DEF_HELPER_FLAGS_3(gvec_vistr32, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
 DEF_HELPER_4(gvec_vistr_cc8, void, ptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vistr_cc16, void, ptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vistr_cc32, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_6(gvec_vstrc_cc8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index b4a6b5960864..a2969fab5884 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1201,6 +1201,8 @@
     F(0xe781, VFENE,   VRR_b, V,   0, 0, 0, 0, vfene, 0, IF_VEC)
 /* VECTOR ISOLATE STRING */
     F(0xe75c, VISTR,   VRR_a, V,   0, 0, 0, 0, vistr, 0, IF_VEC)
+/* VECTOR STRING RANGE COMPARE */
+    F(0xe78a, VSTRC,   VRR_d, V,   0, 0, 0, 0, vstrc, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 08a62eab5263..f26ffa28957a 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -217,6 +217,10 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
     tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
                        vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
                        16, 16, data, fn)
+#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \
+    tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
+                       ptr, 16, 16, data, fn)
 #define gen_gvec_dup_i64(es, v1, c) \
     tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
 #define gen_gvec_mov(v1, v2) \
@@ -2479,3 +2483,58 @@ static DisasJumpType op_vistr(DisasContext *s, DisasOps *o)
     }
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m5);
+    const uint8_t m6 = get_field(s->fields, m6);
+    static gen_helper_gvec_4 * const g[3] = {
+        gen_helper_gvec_vstrc8,
+        gen_helper_gvec_vstrc16,
+        gen_helper_gvec_vstrc32,
+    };
+    static gen_helper_gvec_4 * const g_rt[3] = {
+        gen_helper_gvec_vstrc_rt8,
+        gen_helper_gvec_vstrc_rt16,
+        gen_helper_gvec_vstrc_rt32,
+    };
+    static gen_helper_gvec_4_ptr * const g_cc[3] = {
+        gen_helper_gvec_vstrc_cc8,
+        gen_helper_gvec_vstrc_cc16,
+        gen_helper_gvec_vstrc_cc32,
+    };
+    static gen_helper_gvec_4_ptr * const g_cc_rt[3] = {
+        gen_helper_gvec_vstrc_cc_rt8,
+        gen_helper_gvec_vstrc_cc_rt16,
+        gen_helper_gvec_vstrc_cc_rt32,
+    };
+
+    if (es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m6, 0, 1)) {
+        if (extract32(m6, 2, 1)) {
+            gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           cpu_env, m6, g_cc_rt[es]);
+        } else {
+            gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           cpu_env, m6, g_cc[es]);
+        }
+        set_cc_static(s);
+    } else {
+        if (extract32(m6, 2, 1)) {
+            gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           m6, g_rt[es]);
+        } else {
+            gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           m6, g[es]);
+        }
+    }
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec.h b/target/s390x/vec.h
index affc62874cae..a6e361869b2e 100644
--- a/target/s390x/vec.h
+++ b/target/s390x/vec.h
@@ -117,4 +117,25 @@ static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr,
     v->doubleword[enr] = data;
 }
 
+static inline void s390_vec_write_element(S390Vector *v, uint8_t enr,
+                                          uint8_t es, uint64_t data)
+{
+    switch (es) {
+    case MO_8:
+        s390_vec_write_element8(v, enr, data);
+        break;
+    case MO_16:
+        s390_vec_write_element16(v, enr, data);
+        break;
+    case MO_32:
+        s390_vec_write_element32(v, enr, data);
+        break;
+    case MO_64:
+        s390_vec_write_element64(v, enr, data);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 #endif /* S390X_VEC_H */
diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c
index 6bafa23bd723..c516c0ceeb4c 100644
--- a/target/s390x/vec_string_helper.c
+++ b/target/s390x/vec_string_helper.c
@@ -328,3 +328,146 @@ void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
 DEF_VISTR_CC_HELPER(8)
 DEF_VISTR_CC_HELPER(16)
 DEF_VISTR_CC_HELPER(32)
+
+static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
+{
+    const bool equal = extract32(c, 7, 1);
+    const bool lower = extract32(c, 6, 1);
+    const bool higher = extract32(c, 5, 1);
+
+    if (data < l) {
+        return lower;
+    } else if (data > l) {
+        return higher;
+    }
+    return equal;
+}
+
+static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
+                 bool in, bool rt, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0 = s390_vec_read_element64(v2, 0);
+    uint64_t a1 = s390_vec_read_element64(v2, 1);
+    int first_zero = 16, first_match = 16;
+    S390Vector rt_result = {};
+    uint64_t z0, z1;
+    int i, j;
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    for (i = 0; i < 16 / (1 << es); i++) {
+        const uint32_t data = s390_vec_read_element(v2, i, es);
+        const int cur_byte = i * (1 << es);
+        bool any_match = false;
+
+        /* if we don't need a bit vector, we can stop early */
+        if (cur_byte == first_zero && !rt) {
+            break;
+        }
+
+        for (j = 0; j < 16 / (1 << es); j += 2) {
+            const uint32_t l1 = s390_vec_read_element(v3, j, es);
+            const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
+            /* we are only interested in the highest byte of each element */
+            const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
+            const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
+
+            if (element_compare(data, l1, c1) &&
+                element_compare(data, l2, c2)) {
+                any_match = true;
+                break;
+            }
+        }
+        /* invert the result if requested */
+        any_match = in ^ any_match;
+
+        if (any_match) {
+            /* indicate bit vector if requested */
+            if (rt) {
+                const uint64_t val = -1ull;
+
+                first_match = MIN(cur_byte, first_match);
+                s390_vec_write_element(&rt_result, i, es, val);
+            } else {
+                /* stop on the first match */
+                first_match = cur_byte;
+                break;
+            }
+        }
+    }
+
+    if (rt) {
+        *(S390Vector *)v1 = rt_result;
+    } else {
+        s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
+        s390_vec_write_element64(v1, 1, 0);
+    }
+
+    if (first_zero == 16 && first_match == 16) {
+        return 3; /* no match */
+    } else if (first_zero == 16) {
+        return 1; /* matching elements, no match for zero */
+    } else if (first_match < first_zero) {
+        return 2; /* matching elements before match for zero */
+    }
+    return 0; /* match for zero */
+}
+
+#define DEF_VSTRC_HELPER(BITS)                                                 \
+void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3,        \
+                              const void *v4, uint32_t desc)                   \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                               \
+}
+DEF_VSTRC_HELPER(8)
+DEF_VSTRC_HELPER(16)
+DEF_VSTRC_HELPER(32)
+
+#define DEF_VSTRC_RT_HELPER(BITS)                                              \
+void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 const void *v4, uint32_t desc)                \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                               \
+}
+DEF_VSTRC_RT_HELPER(8)
+DEF_VSTRC_RT_HELPER(16)
+DEF_VSTRC_RT_HELPER(32)
+
+#define DEF_VSTRC_CC_HELPER(BITS)                                              \
+void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 const void *v4, CPUS390XState *env,           \
+                                 uint32_t desc)                                \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                  \
+}
+DEF_VSTRC_CC_HELPER(8)
+DEF_VSTRC_CC_HELPER(16)
+DEF_VSTRC_CC_HELPER(32)
+
+#define DEF_VSTRC_CC_RT_HELPER(BITS)                                           \
+void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3,  \
+                                    const void *v4, CPUS390XState *env,        \
+                                    uint32_t desc)                             \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                  \
+}
+DEF_VSTRC_CC_RT_HELPER(8)
+DEF_VSTRC_CC_RT_HELPER(16)
+DEF_VSTRC_CC_RT_HELPER(32)
-- 
2.20.1



  parent reply	other threads:[~2019-06-07 10:52 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-07  9:52 [Qemu-devel] [PULL 00/35] s390x updates Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 01/35] MAINTAINERS: cover tests/migration/s390x/ Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 02/35] vfio-ccw: support async command subregion Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 03/35] s390x/tcg: Implement VECTOR FIND ANY ELEMENT EQUAL Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 04/35] s390x/tcg: Implement VECTOR FIND " Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 05/35] s390x/tcg: Implement VECTOR FIND ELEMENT NOT EQUAL Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 06/35] s390x/tcg: Implement VECTOR ISOLATE STRING Cornelia Huck
2019-06-07  9:52 ` Cornelia Huck [this message]
2019-06-07  9:52 ` [Qemu-devel] [PULL 08/35] s390x: Align vector registers to 16 bytes Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 09/35] s390x: Use uint64_t for vector registers Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 10/35] s390x/tcg: Fix max_byte detection for stfle Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 11/35] s390x/tcg: Store only the necessary amount of doublewords for STFLE Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 12/35] s390x/tcg: Introduce tcg_s390_vector_exception() Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 13/35] s390x/tcg: Export float_comp_to_cc() and float(32|64|128)_dcmask() Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 14/35] s390x/tcg: Implement VECTOR FP ADD Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 15/35] s390x/tcg: Implement VECTOR FP COMPARE (AND SIGNAL) SCALAR Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 16/35] s390x/tcg: Implement VECTOR FP COMPARE (EQUAL|HIGH|HIGH OR EQUAL) Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 17/35] s390x/tcg: Implement VECTOR FP CONVERT FROM FIXED 64-BIT Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 18/35] s390x/tcg: Implement VECTOR FP CONVERT FROM LOGICAL 64-BIT Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 19/35] s390x/tcg: Implement VECTOR FP CONVERT TO FIXED 64-BIT Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 20/35] s390x/tcg: Implement VECTOR FP CONVERT TO LOGICAL 64-BIT Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 21/35] s390x/tcg: Implement VECTOR FP DIVIDE Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 22/35] s390x/tcg: Implement VECTOR LOAD FP INTEGER Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 23/35] s390x/tcg: Implement VECTOR LOAD LENGTHENED Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 24/35] s390x/tcg: Implement VECTOR LOAD ROUNDED Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 25/35] s390x/tcg: Implement VECTOR FP MULTIPLY Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 26/35] s390x/tcg: Implement VECTOR FP MULTIPLY AND (ADD|SUBTRACT) Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 27/35] s390x/tcg: Implement VECTOR FP PERFORM SIGN OPERATION Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 28/35] s390x/tcg: Implement VECTOR FP SQUARE ROOT Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 29/35] s390x/tcg: Implement VECTOR FP SUBTRACT Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 30/35] s390x/tcg: Implement VECTOR FP TEST DATA CLASS IMMEDIATE Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 31/35] s390x/tcg: Allow linux-user to use vector instructions Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 32/35] s390x/tcg: We support the Vector Facility Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 33/35] s390x: Bump the "qemu" CPU model up to a stripped-down z13 Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 34/35] s390x/tcg: Use tcg_gen_gvec_bitsel for VECTOR SELECT Cornelia Huck
2019-06-07  9:52 ` [Qemu-devel] [PULL 35/35] linux-user: elf: ELF_HWCAP for s390x Cornelia Huck
2019-06-07  9:57 ` [Qemu-devel] [PULL 00/35] s390x updates Peter Maydell
2019-06-07  9:58   ` Peter Maydell
2019-06-07 10:02     ` Cornelia Huck
2019-06-07 10:06     ` Cornelia Huck
2019-06-07 13:00     ` Cornelia Huck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190607095237.11364-8-cohuck@redhat.com \
    --to=cohuck@redhat.com \
    --cc=david@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-s390x@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.