All of lore.kernel.org
 help / color / mirror / Atom feed
From: Cornelia Huck <cohuck@redhat.com>
To: Peter Maydell <peter.maydell@linaro.org>
Cc: qemu-s390x@nongnu.org,
	Richard Henderson <richard.henderson@linaro.org>,
	qemu-devel@nongnu.org, David Hildenbrand <david@redhat.com>
Subject: [Qemu-devel] [PULL v2 04/34] s390x/tcg: Implement VECTOR FIND ELEMENT NOT EQUAL
Date: Fri,  7 Jun 2019 16:16:57 +0200	[thread overview]
Message-ID: <20190607141727.29018-5-cohuck@redhat.com> (raw)
In-Reply-To: <20190607141727.29018-1-cohuck@redhat.com>

From: David Hildenbrand <david@redhat.com>

Similar to VECTOR FIND ELEMENT EQUAL. Core logic courtesy of Richard H.

Add s390_vec_read_element() that can deal with element sizes.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h            |  6 +++
 target/s390x/insn-data.def       |  2 +
 target/s390x/translate_vx.inc.c  | 31 +++++++++++++
 target/s390x/vec.h               | 19 ++++++++
 target/s390x/vec_string_helper.c | 74 ++++++++++++++++++++++++++++++++
 5 files changed, 132 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index a1b169b666e9..fb50b404db04 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -224,6 +224,12 @@ DEF_HELPER_FLAGS_4(gvec_vfee32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_5(gvec_vfee_cc8, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfee_cc16, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfee_cc32, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfene8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfene16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfene32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_5(gvec_vfene_cc8, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfene_cc16, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfene_cc32, void, ptr, cptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index d8907ef6a575..d03c1ee0b3ab 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1197,6 +1197,8 @@
     F(0xe782, VFAE,    VRR_b, V,   0, 0, 0, 0, vfae, 0, IF_VEC)
 /* VECTOR FIND ELEMENT EQUAL */
     F(0xe780, VFEE,    VRR_b, V,   0, 0, 0, 0, vfee, 0, IF_VEC)
+/* VECTOR FIND ELEMENT NOT EQUAL */
+    F(0xe781, VFENE,   VRR_b, V,   0, 0, 0, 0, vfene, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index b25afbc011b3..1ad0b6251721 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2414,3 +2414,34 @@ static DisasJumpType op_vfee(DisasContext *s, DisasOps *o)
     }
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vfene(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    static gen_helper_gvec_3 * const g[3] = {
+        gen_helper_gvec_vfene8,
+        gen_helper_gvec_vfene16,
+        gen_helper_gvec_vfene32,
+    };
+    static gen_helper_gvec_3_ptr * const g_cc[3] = {
+        gen_helper_gvec_vfene_cc8,
+        gen_helper_gvec_vfene_cc16,
+        gen_helper_gvec_vfene_cc32,
+    };
+
+    if (es > ES_32 || m5 & ~0x3) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), m5, g[es]);
+    }
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec.h b/target/s390x/vec.h
index 3313fb43ee75..affc62874cae 100644
--- a/target/s390x/vec.h
+++ b/target/s390x/vec.h
@@ -12,6 +12,8 @@
 #ifndef S390X_VEC_H
 #define S390X_VEC_H
 
+#include "tcg/tcg.h"
+
 typedef union S390Vector {
     uint64_t doubleword[2];
     uint32_t word[4];
@@ -70,6 +72,23 @@ static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr)
     return v->doubleword[enr];
 }
 
+static inline uint64_t s390_vec_read_element(const S390Vector *v, uint8_t enr,
+                                             uint8_t es)
+{
+    switch (es) {
+    case MO_8:
+        return s390_vec_read_element8(v, enr);
+    case MO_16:
+        return s390_vec_read_element16(v, enr);
+    case MO_32:
+        return s390_vec_read_element32(v, enr);
+    case MO_64:
+        return s390_vec_read_element64(v, enr);
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static inline void s390_vec_write_element8(S390Vector *v, uint8_t enr,
                                            uint8_t data)
 {
diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c
index 05ad99e17360..0ee3470112b5 100644
--- a/target/s390x/vec_string_helper.c
+++ b/target/s390x/vec_string_helper.c
@@ -27,6 +27,15 @@ static inline uint64_t zero_search(uint64_t a, uint64_t mask)
     return ~(((a & mask) + mask) | a | mask);
 }
 
+/*
+ * Returns a bit set in the MSB of each element that is not zero,
+ * as defined by the mask.
+ */
+static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
+{
+    return (((a & mask) + mask) | a) & ~mask;
+}
+
 /*
  * Returns the byte offset for the first match, or 16 for no match.
  */
@@ -209,3 +218,68 @@ void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3,      \
 DEF_VFEE_CC_HELPER(8)
 DEF_VFEE_CC_HELPER(16)
 DEF_VFEE_CC_HELPER(32)
+
+static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
+    uint64_t first_zero = 16;
+    uint64_t first_inequal;
+    bool smaller = false;
+
+    a0 = s390_vec_read_element64(v2, 0);
+    a1 = s390_vec_read_element64(v2, 1);
+    b0 = s390_vec_read_element64(v3, 0);
+    b1 = s390_vec_read_element64(v3, 1);
+    e0 = nonzero_search(a0 ^ b0, mask);
+    e1 = nonzero_search(a1 ^ b1, mask);
+    first_inequal = match_index(e0, e1);
+
+    /* identify the smaller element */
+    if (first_inequal < 16) {
+        uint8_t enr = first_inequal / (1 << es);
+        uint32_t a = s390_vec_read_element(v2, enr, es);
+        uint32_t b = s390_vec_read_element(v3, enr, es);
+
+        smaller = a < b;
+    }
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
+    s390_vec_write_element64(v1, 1, 0);
+    if (first_zero == 16 && first_inequal == 16) {
+        return 3;
+    } else if (first_zero < first_inequal) {
+        return 0;
+    }
+    return smaller ? 1 : 2;
+}
+
+#define DEF_VFENE_HELPER(BITS)                                                 \
+void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3,        \
+                              uint32_t desc)                                   \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vfene(v1, v2, v3, zs, MO_##BITS);                                          \
+}
+DEF_VFENE_HELPER(8)
+DEF_VFENE_HELPER(16)
+DEF_VFENE_HELPER(32)
+
+#define DEF_VFENE_CC_HELPER(BITS)                                              \
+void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 CPUS390XState *env, uint32_t desc)            \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS);                             \
+}
+DEF_VFENE_CC_HELPER(8)
+DEF_VFENE_CC_HELPER(16)
+DEF_VFENE_CC_HELPER(32)
-- 
2.20.1



  parent reply	other threads:[~2019-06-07 17:06 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-07 14:16 [Qemu-devel] [PULL v2 00/34] s390x updates Cornelia Huck
2019-06-07 14:16 ` [Qemu-devel] [PULL v2 01/34] MAINTAINERS: cover tests/migration/s390x/ Cornelia Huck
2019-06-07 14:16 ` [Qemu-devel] [PULL v2 02/34] s390x/tcg: Implement VECTOR FIND ANY ELEMENT EQUAL Cornelia Huck
2019-06-07 14:16 ` [Qemu-devel] [PULL v2 03/34] s390x/tcg: Implement VECTOR FIND " Cornelia Huck
2019-06-07 14:16 ` Cornelia Huck [this message]
2019-06-07 14:16 ` [Qemu-devel] [PULL v2 05/34] s390x/tcg: Implement VECTOR ISOLATE STRING Cornelia Huck
2019-06-07 14:16 ` [Qemu-devel] [PULL v2 06/34] s390x/tcg: Implement VECTOR STRING RANGE COMPARE Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 07/34] s390x: Align vector registers to 16 bytes Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 08/34] s390x: Use uint64_t for vector registers Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 09/34] s390x/tcg: Fix max_byte detection for stfle Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 10/34] s390x/tcg: Store only the necessary amount of doublewords for STFLE Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 11/34] s390x/tcg: Introduce tcg_s390_vector_exception() Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 12/34] s390x/tcg: Export float_comp_to_cc() and float(32|64|128)_dcmask() Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 13/34] s390x/tcg: Implement VECTOR FP ADD Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 14/34] s390x/tcg: Implement VECTOR FP COMPARE (AND SIGNAL) SCALAR Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 15/34] s390x/tcg: Implement VECTOR FP COMPARE (EQUAL|HIGH|HIGH OR EQUAL) Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 16/34] s390x/tcg: Implement VECTOR FP CONVERT FROM FIXED 64-BIT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 17/34] s390x/tcg: Implement VECTOR FP CONVERT FROM LOGICAL 64-BIT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 18/34] s390x/tcg: Implement VECTOR FP CONVERT TO FIXED 64-BIT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 19/34] s390x/tcg: Implement VECTOR FP CONVERT TO LOGICAL 64-BIT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 20/34] s390x/tcg: Implement VECTOR FP DIVIDE Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 21/34] s390x/tcg: Implement VECTOR LOAD FP INTEGER Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 22/34] s390x/tcg: Implement VECTOR LOAD LENGTHENED Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 23/34] s390x/tcg: Implement VECTOR LOAD ROUNDED Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 24/34] s390x/tcg: Implement VECTOR FP MULTIPLY Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 25/34] s390x/tcg: Implement VECTOR FP MULTIPLY AND (ADD|SUBTRACT) Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 26/34] s390x/tcg: Implement VECTOR FP PERFORM SIGN OPERATION Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 27/34] s390x/tcg: Implement VECTOR FP SQUARE ROOT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 28/34] s390x/tcg: Implement VECTOR FP SUBTRACT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 29/34] s390x/tcg: Implement VECTOR FP TEST DATA CLASS IMMEDIATE Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 30/34] s390x/tcg: Allow linux-user to use vector instructions Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 31/34] s390x/tcg: We support the Vector Facility Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 32/34] s390x: Bump the "qemu" CPU model up to a stripped-down z13 Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 33/34] s390x/tcg: Use tcg_gen_gvec_bitsel for VECTOR SELECT Cornelia Huck
2019-06-07 14:17 ` [Qemu-devel] [PULL v2 34/34] linux-user: elf: ELF_HWCAP for s390x Cornelia Huck
2019-06-07 15:14 ` [Qemu-devel] [PULL v2 00/34] s390x updates Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190607141727.29018-5-cohuck@redhat.com \
    --to=cohuck@redhat.com \
    --cc=david@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-s390x@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.