All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Hildenbrand <david@redhat.com>
To: qemu-devel@nongnu.org
Cc: Thomas Huth <thuth@redhat.com>,
	David Hildenbrand <david@redhat.com>,
	Cornelia Huck <cohuck@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	qemu-s390x@nongnu.org, Richard Henderson <rth@twiddle.net>
Subject: [Qemu-devel] [PULL SUBSYSTEM s390x 05/33] s390x/tcg: Implement VECTOR STRING RANGE COMPARE
Date: Wed,  5 Jun 2019 22:33:35 +0200	[thread overview]
Message-ID: <20190605203403.29461-6-david@redhat.com> (raw)
In-Reply-To: <20190605203403.29461-1-david@redhat.com>

Unfortunately, there is no easy way to avoid looping over all elements
in v2. Provide specialized variants for !cc,!rt/!cc,rt/cc,!rt/cc,rt and
all element types. Especially for different values of rt, the compiler
might be able to optimize the code a lot.

Add s390_vec_write_element().

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h            |  12 +++
 target/s390x/insn-data.def       |   2 +
 target/s390x/translate_vx.inc.c  |  59 +++++++++++++
 target/s390x/vec.h               |  21 +++++
 target/s390x/vec_string_helper.c | 143 +++++++++++++++++++++++++++++++
 5 files changed, 237 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 1f9f0b463b..5db67779d3 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -236,6 +236,18 @@ DEF_HELPER_FLAGS_3(gvec_vistr32, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
 DEF_HELPER_4(gvec_vistr_cc8, void, ptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vistr_cc16, void, ptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vistr_cc32, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_6(gvec_vstrc_cc8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index b4a6b59608..a2969fab58 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1201,6 +1201,8 @@
     F(0xe781, VFENE,   VRR_b, V,   0, 0, 0, 0, vfene, 0, IF_VEC)
 /* VECTOR ISOLATE STRING */
     F(0xe75c, VISTR,   VRR_a, V,   0, 0, 0, 0, vistr, 0, IF_VEC)
+/* VECTOR STRING RANGE COMPARE */
+    F(0xe78a, VSTRC,   VRR_d, V,   0, 0, 0, 0, vstrc, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 08a62eab52..f26ffa2895 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -217,6 +217,10 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
     tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
                        vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
                        16, 16, data, fn)
+#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \
+    tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
+                       ptr, 16, 16, data, fn)
 #define gen_gvec_dup_i64(es, v1, c) \
     tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
 #define gen_gvec_mov(v1, v2) \
@@ -2479,3 +2483,58 @@ static DisasJumpType op_vistr(DisasContext *s, DisasOps *o)
     }
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m5);
+    const uint8_t m6 = get_field(s->fields, m6);
+    static gen_helper_gvec_4 * const g[3] = {
+        gen_helper_gvec_vstrc8,
+        gen_helper_gvec_vstrc16,
+        gen_helper_gvec_vstrc32,
+    };
+    static gen_helper_gvec_4 * const g_rt[3] = {
+        gen_helper_gvec_vstrc_rt8,
+        gen_helper_gvec_vstrc_rt16,
+        gen_helper_gvec_vstrc_rt32,
+    };
+    static gen_helper_gvec_4_ptr * const g_cc[3] = {
+        gen_helper_gvec_vstrc_cc8,
+        gen_helper_gvec_vstrc_cc16,
+        gen_helper_gvec_vstrc_cc32,
+    };
+    static gen_helper_gvec_4_ptr * const g_cc_rt[3] = {
+        gen_helper_gvec_vstrc_cc_rt8,
+        gen_helper_gvec_vstrc_cc_rt16,
+        gen_helper_gvec_vstrc_cc_rt32,
+    };
+
+    if (es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m6, 0, 1)) {
+        if (extract32(m6, 2, 1)) {
+            gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           cpu_env, m6, g_cc_rt[es]);
+        } else {
+            gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           cpu_env, m6, g_cc[es]);
+        }
+        set_cc_static(s);
+    } else {
+        if (extract32(m6, 2, 1)) {
+            gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           m6, g_rt[es]);
+        } else {
+            gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           m6, g[es]);
+        }
+    }
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec.h b/target/s390x/vec.h
index affc62874c..a6e361869b 100644
--- a/target/s390x/vec.h
+++ b/target/s390x/vec.h
@@ -117,4 +117,25 @@ static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr,
     v->doubleword[enr] = data;
 }
 
+static inline void s390_vec_write_element(S390Vector *v, uint8_t enr,
+                                          uint8_t es, uint64_t data)
+{
+    switch (es) {
+    case MO_8:
+        s390_vec_write_element8(v, enr, data);
+        break;
+    case MO_16:
+        s390_vec_write_element16(v, enr, data);
+        break;
+    case MO_32:
+        s390_vec_write_element32(v, enr, data);
+        break;
+    case MO_64:
+        s390_vec_write_element64(v, enr, data);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 #endif /* S390X_VEC_H */
diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c
index 6bafa23bd7..c516c0ceeb 100644
--- a/target/s390x/vec_string_helper.c
+++ b/target/s390x/vec_string_helper.c
@@ -328,3 +328,146 @@ void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
 DEF_VISTR_CC_HELPER(8)
 DEF_VISTR_CC_HELPER(16)
 DEF_VISTR_CC_HELPER(32)
+
+static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
+{
+    const bool equal = extract32(c, 7, 1);
+    const bool lower = extract32(c, 6, 1);
+    const bool higher = extract32(c, 5, 1);
+
+    if (data < l) {
+        return lower;
+    } else if (data > l) {
+        return higher;
+    }
+    return equal;
+}
+
+static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
+                 bool in, bool rt, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0 = s390_vec_read_element64(v2, 0);
+    uint64_t a1 = s390_vec_read_element64(v2, 1);
+    int first_zero = 16, first_match = 16;
+    S390Vector rt_result = {};
+    uint64_t z0, z1;
+    int i, j;
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    for (i = 0; i < 16 / (1 << es); i++) {
+        const uint32_t data = s390_vec_read_element(v2, i, es);
+        const int cur_byte = i * (1 << es);
+        bool any_match = false;
+
+        /* if we don't need a bit vector, we can stop early */
+        if (cur_byte == first_zero && !rt) {
+            break;
+        }
+
+        for (j = 0; j < 16 / (1 << es); j += 2) {
+            const uint32_t l1 = s390_vec_read_element(v3, j, es);
+            const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
+            /* we are only interested in the highest byte of each element */
+            const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
+            const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
+
+            if (element_compare(data, l1, c1) &&
+                element_compare(data, l2, c2)) {
+                any_match = true;
+                break;
+            }
+        }
+        /* invert the result if requested */
+        any_match = in ^ any_match;
+
+        if (any_match) {
+            /* indicate bit vector if requested */
+            if (rt) {
+                const uint64_t val = -1ull;
+
+                first_match = MIN(cur_byte, first_match);
+                s390_vec_write_element(&rt_result, i, es, val);
+            } else {
+                /* stop on the first match */
+                first_match = cur_byte;
+                break;
+            }
+        }
+    }
+
+    if (rt) {
+        *(S390Vector *)v1 = rt_result;
+    } else {
+        s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
+        s390_vec_write_element64(v1, 1, 0);
+    }
+
+    if (first_zero == 16 && first_match == 16) {
+        return 3; /* no match */
+    } else if (first_zero == 16) {
+        return 1; /* matching elements, no match for zero */
+    } else if (first_match < first_zero) {
+        return 2; /* matching elements before match for zero */
+    }
+    return 0; /* match for zero */
+}
+
+#define DEF_VSTRC_HELPER(BITS)                                                 \
+void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3,        \
+                              const void *v4, uint32_t desc)                   \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                               \
+}
+DEF_VSTRC_HELPER(8)
+DEF_VSTRC_HELPER(16)
+DEF_VSTRC_HELPER(32)
+
+#define DEF_VSTRC_RT_HELPER(BITS)                                              \
+void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 const void *v4, uint32_t desc)                \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                               \
+}
+DEF_VSTRC_RT_HELPER(8)
+DEF_VSTRC_RT_HELPER(16)
+DEF_VSTRC_RT_HELPER(32)
+
+#define DEF_VSTRC_CC_HELPER(BITS)                                              \
+void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 const void *v4, CPUS390XState *env,           \
+                                 uint32_t desc)                                \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                  \
+}
+DEF_VSTRC_CC_HELPER(8)
+DEF_VSTRC_CC_HELPER(16)
+DEF_VSTRC_CC_HELPER(32)
+
+#define DEF_VSTRC_CC_RT_HELPER(BITS)                                           \
+void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3,  \
+                                    const void *v4, CPUS390XState *env,        \
+                                    uint32_t desc)                             \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                  \
+}
+DEF_VSTRC_CC_RT_HELPER(8)
+DEF_VSTRC_CC_RT_HELPER(16)
+DEF_VSTRC_CC_RT_HELPER(32)
-- 
2.21.0



  parent reply	other threads:[~2019-06-05 20:42 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-05 20:33 [Qemu-devel] [PULL SUBSYSTEM s390x 00/33] s390x/tcg: Final Vector Instruction Support David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 01/33] s390x/tcg: Implement VECTOR FIND ANY ELEMENT EQUAL David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 02/33] s390x/tcg: Implement VECTOR FIND " David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 03/33] s390x/tcg: Implement VECTOR FIND ELEMENT NOT EQUAL David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 04/33] s390x/tcg: Implement VECTOR ISOLATE STRING David Hildenbrand
2019-06-05 20:33 ` David Hildenbrand [this message]
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 06/33] s390x: Align vector registers to 16 bytes David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 07/33] s390x: Use uint64_t for vector registers David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 08/33] s390x/tcg: Fix max_byte detection for stfle David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 09/33] s390x/tcg: Store only the necessary amount of doublewords for STFLE David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 10/33] s390x/tcg: Introduce tcg_s390_vector_exception() David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 11/33] s390x/tcg: Export float_comp_to_cc() and float(32|64|128)_dcmask() David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 12/33] s390x/tcg: Implement VECTOR FP ADD David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 13/33] s390x/tcg: Implement VECTOR FP COMPARE (AND SIGNAL) SCALAR David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 14/33] s390x/tcg: Implement VECTOR FP COMPARE (EQUAL|HIGH|HIGH OR EQUAL) David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 15/33] s390x/tcg: Implement VECTOR FP CONVERT FROM FIXED 64-BIT David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 16/33] s390x/tcg: Implement VECTOR FP CONVERT FROM LOGICAL 64-BIT David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 17/33] s390x/tcg: Implement VECTOR FP CONVERT TO FIXED 64-BIT David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 18/33] s390x/tcg: Implement VECTOR FP CONVERT TO LOGICAL 64-BIT David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 19/33] s390x/tcg: Implement VECTOR FP DIVIDE David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 20/33] s390x/tcg: Implement VECTOR LOAD FP INTEGER David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 21/33] s390x/tcg: Implement VECTOR LOAD LENGTHENED David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 22/33] s390x/tcg: Implement VECTOR LOAD ROUNDED David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 23/33] s390x/tcg: Implement VECTOR FP MULTIPLY David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 24/33] s390x/tcg: Implement VECTOR FP MULTIPLY AND (ADD|SUBTRACT) David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 25/33] s390x/tcg: Implement VECTOR FP PERFORM SIGN OPERATION David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 26/33] s390x/tcg: Implement VECTOR FP SQUARE ROOT David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 27/33] s390x/tcg: Implement VECTOR FP SUBTRACT David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 28/33] s390x/tcg: Implement VECTOR FP TEST DATA CLASS IMMEDIATE David Hildenbrand
2019-06-05 20:33 ` [Qemu-devel] [PULL SUBSYSTEM s390x 29/33] s390x/tcg: Allow linux-user to use vector instructions David Hildenbrand
2019-06-05 20:34 ` [Qemu-devel] [PULL SUBSYSTEM s390x 30/33] s390x/tcg: We support the Vector Facility David Hildenbrand
2019-06-05 20:34 ` [Qemu-devel] [PULL SUBSYSTEM s390x 31/33] s390x: Bump the "qemu" CPU model up to a stripped-down z13 David Hildenbrand
2019-06-05 20:34 ` [Qemu-devel] [PULL SUBSYSTEM s390x 32/33] s390x/tcg: Use tcg_gen_gvec_bitsel for VECTOR SELECT David Hildenbrand
2019-06-05 20:34 ` [Qemu-devel] [PULL SUBSYSTEM s390x 33/33] linux-user: elf: ELF_HWCAP for s390x David Hildenbrand
2019-06-06 15:45 ` [Qemu-devel] [PULL SUBSYSTEM s390x 00/33] s390x/tcg: Final Vector Instruction Support Cornelia Huck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190605203403.29461-6-david@redhat.com \
    --to=david@redhat.com \
    --cc=cohuck@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-s390x@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=rth@twiddle.net \
    --cc=thuth@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.