qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 39/52] target/arm: Convert PMUL.8 to gvec
Date: Fri, 21 Feb 2020 13:07:27 +0000	[thread overview]
Message-ID: <20200221130740.7583-40-peter.maydell@linaro.org> (raw)
In-Reply-To: <20200221130740.7583-1-peter.maydell@linaro.org>

From: Richard Henderson <richard.henderson@linaro.org>

The gvec form will be needed for implementing SVE2.

Extend the implementation to operate on uint64_t instead of uint32_t.
Use a counted inner loop instead of terminating when op1 goes to zero,
looking toward the required implementation for ARMv8.4-DIT.

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200216214232.4230-3-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/helper.h        |  3 ++-
 target/arm/neon_helper.c   | 22 ----------------------
 target/arm/translate-a64.c | 10 +++-------
 target/arm/translate.c     | 11 ++++-------
 target/arm/vec_helper.c    | 30 ++++++++++++++++++++++++++++++
 5 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 459a278b5c4..82450a3f965 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -342,7 +342,6 @@ DEF_HELPER_2(neon_sub_u8, i32, i32, i32)
 DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
 DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
 DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
-DEF_HELPER_2(neon_mul_p8, i32, i32, i32)
 DEF_HELPER_2(neon_mull_p8, i64, i32, i32)
 
 DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
@@ -696,6 +695,8 @@ DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #include "helper-sve.h"
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index c581ffb7d3c..9e7a9a1ac54 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -1131,28 +1131,6 @@ NEON_VOP(mul_u16, neon_u16, 2)
 
 /* Polynomial multiplication is like integer multiplication except the
    partial products are XORed, not added.  */
-uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
-{
-    uint32_t mask;
-    uint32_t result;
-    result = 0;
-    while (op1) {
-        mask = 0;
-        if (op1 & 1)
-            mask |= 0xff;
-        if (op1 & (1 << 8))
-            mask |= (0xff << 8);
-        if (op1 & (1 << 16))
-            mask |= (0xff << 16);
-        if (op1 & (1 << 24))
-            mask |= (0xff << 24);
-        result ^= op2 & mask;
-        op1 = (op1 >> 1) & 0x7f7f7f7f;
-        op2 = (op2 << 1) & 0xfefefefe;
-    }
-    return result;
-}
-
 uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2)
 {
     uint64_t result = 0;
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 9fbcf7d2f95..a4fbb18a535 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -11169,9 +11169,10 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
     case 0x13: /* MUL, PMUL */
         if (!u) { /* MUL */
             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
-            return;
+        } else {  /* PMUL */
+            gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
         }
-        break;
+        return;
     case 0x12: /* MLA, MLS */
         if (u) {
             gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
@@ -11301,11 +11302,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
                 genfn = fns[size][u];
                 break;
             }
-            case 0x13: /* MUL, PMUL */
-                assert(u); /* PMUL */
-                assert(size == 0);
-                genfn = gen_helper_neon_mul_p8;
-                break;
             case 0x16: /* SQDMULH, SQRDMULH */
             {
                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
diff --git a/target/arm/translate.c b/target/arm/translate.c
index a96104d6b42..e8f79899ca7 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5007,16 +5007,17 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
 
         case NEON_3R_VMUL: /* VMUL */
             if (u) {
-                /* Polynomial case allows only P8 and is handled below.  */
+                /* Polynomial case allows only P8.  */
                 if (size != 0) {
                     return 1;
                 }
+                tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
+                                   0, gen_helper_gvec_pmul_b);
             } else {
                 tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
                                  vec_size, vec_size);
-                return 0;
             }
-            break;
+            return 0;
 
         case NEON_3R_VML: /* VMLA, VMLS */
             tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
@@ -5206,10 +5207,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
             tmp2 = neon_load_reg(rd, pass);
             gen_neon_add(size, tmp, tmp2);
             break;
-        case NEON_3R_VMUL:
-            /* VMUL.P8; other cases already eliminated.  */
-            gen_helper_neon_mul_p8(tmp, tmp, tmp2);
-            break;
         case NEON_3R_VPMAX:
             GEN_NEON_INTEGER_OP(pmax);
             break;
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index fcb36639036..854de0e2795 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -1134,3 +1134,33 @@ void HELPER(gvec_ushl_h)(void *vd, void *vn, void *vm, uint32_t desc)
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
+
+/*
+ * 8x8->8 polynomial multiply.
+ *
+ * Polynomial multiplication is like integer multiplication except the
+ * partial products are XORed, not added.
+ *
+ * TODO: expose this as a generic vector operation, as it is a common
+ * crypto building block.
+ */
+void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 8; ++i) {
+        uint64_t nn = n[i];
+        uint64_t mm = m[i];
+        uint64_t rr = 0;
+
+        for (j = 0; j < 8; ++j) {
+            uint64_t mask = (nn & 0x0101010101010101ull) * 0xff;
+            rr ^= mm & mask;
+            mm = (mm << 1) & 0xfefefefefefefefeull;
+            nn >>= 1;
+        }
+        d[i] = rr;
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
-- 
2.20.1



  parent reply	other threads:[~2020-02-21 13:32 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-21 13:06 [PULL 00/52] target-arm queue Peter Maydell
2020-02-21 13:06 ` [PULL 01/52] aspeed/scu: Create separate write callbacks Peter Maydell
2020-02-21 13:06 ` [PULL 02/52] aspeed/scu: Implement chip ID register Peter Maydell
2020-02-21 13:06 ` [PULL 03/52] hw/misc/iotkit-secctl: Fix writing to 'PPC Interrupt Clear' register Peter Maydell
2020-02-21 13:06 ` [PULL 04/52] mainstone: Make providing flash images non-mandatory Peter Maydell
2020-02-21 13:06 ` [PULL 05/52] z2: " Peter Maydell
2020-02-21 13:06 ` [PULL 06/52] target/arm: Flush high bits of sve register after AdvSIMD EXT Peter Maydell
2020-02-21 13:06 ` [PULL 07/52] target/arm: Flush high bits of sve register after AdvSIMD TBL/TBX Peter Maydell
2020-02-21 13:06 ` [PULL 08/52] target/arm: Flush high bits of sve register after AdvSIMD ZIP/UZP/TRN Peter Maydell
2020-02-21 13:06 ` [PULL 09/52] target/arm: Flush high bits of sve register after AdvSIMD INS Peter Maydell
2020-02-21 13:06 ` [PULL 10/52] target/arm: Use bit 55 explicitly for pauth Peter Maydell
2020-02-21 13:06 ` [PULL 11/52] target/arm: Fix select for aa64_va_parameters_both Peter Maydell
2020-02-21 13:07 ` [PULL 12/52] target/arm: Remove ttbr1_valid check from get_phys_addr_lpae Peter Maydell
2020-02-21 13:07 ` [PULL 13/52] target/arm: Split out aa64_va_parameter_tbi, aa64_va_parameter_tbid Peter Maydell
2020-02-21 13:07 ` [PULL 14/52] target/arm: Add _aa32_ to isar_feature functions testing 32-bit ID registers Peter Maydell
2020-02-21 13:07 ` [PULL 15/52] target/arm: Check aa32_pan in take_aarch32_exception(), not aa64_pan Peter Maydell
2020-02-21 13:07 ` [PULL 16/52] target/arm: Add isar_feature_any_fp16 and document naming/usage conventions Peter Maydell
2020-02-21 13:07 ` [PULL 17/52] target/arm: Define and use any_predinv isar_feature test Peter Maydell
2020-02-21 13:07 ` [PULL 18/52] target/arm: Factor out PMU register definitions Peter Maydell
2020-02-21 13:07 ` [PULL 19/52] target/arm: Add and use FIELD definitions for ID_AA64DFR0_EL1 Peter Maydell
2020-02-21 13:07 ` [PULL 20/52] target/arm: Use FIELD macros for clearing ID_DFR0 PERFMON field Peter Maydell
2020-02-21 13:07 ` [PULL 21/52] target/arm: Define an aa32_pmu_8_1 isar feature test function Peter Maydell
2020-02-21 13:07 ` [PULL 22/52] target/arm: Add _aa64_ and _any_ versions of pmu_8_1 isar checks Peter Maydell
2020-02-21 13:07 ` [PULL 23/52] target/arm: Stop assuming DBGDIDR always exists Peter Maydell
2020-02-21 13:07 ` [PULL 24/52] target/arm: Move DBGDIDR into ARMISARegisters Peter Maydell
2020-02-21 13:07 ` [PULL 25/52] target/arm: Read debug-related ID registers from KVM Peter Maydell
2020-02-21 13:07 ` [PULL 26/52] target/arm: Implement ARMv8.1-PMU extension Peter Maydell
2020-02-21 13:07 ` [PULL 27/52] target/arm: Implement ARMv8.4-PMU extension Peter Maydell
2020-02-21 13:07 ` [PULL 28/52] target/arm: Provide ARMv8.4-PMU in '-cpu max' Peter Maydell
2020-02-21 13:07 ` [PULL 29/52] target/arm: Correct definition of PMCRDP Peter Maydell
2020-02-21 13:07 ` [PULL 30/52] target/arm: Correct handling of PMCR_EL0.LC bit Peter Maydell
2020-02-21 13:07 ` [PULL 31/52] target/arm: Test correct register in aa32_pan and aa32_ats1e1 checks Peter Maydell
2020-02-21 13:07 ` [PULL 32/52] target/arm: Use isar_feature function for testing AA32HPD feature Peter Maydell
2020-02-21 13:07 ` [PULL 33/52] target/arm: Use FIELD_EX32 for testing 32-bit fields Peter Maydell
2020-02-21 13:07 ` [PULL 34/52] target/arm: Correctly implement ACTLR2, HACTLR2 Peter Maydell
2020-02-21 13:07 ` [PULL 35/52] hw: usb: hcd-ohci: Move OHCISysBusState and TYPE_SYSBUS_OHCI to include file Peter Maydell
2020-02-21 13:07 ` [PULL 36/52] hcd-ehci: Introduce "companion-enable" sysbus property Peter Maydell
2020-02-21 13:07 ` [PULL 37/52] arm: allwinner: Wire up USB ports Peter Maydell
2020-02-21 13:07 ` [PULL 38/52] target/arm: Vectorize USHL and SSHL Peter Maydell
2020-02-21 13:07 ` Peter Maydell [this message]
2020-02-21 13:07 ` [PULL 40/52] target/arm: Convert PMULL.64 to gvec Peter Maydell
2020-02-21 13:07 ` [PULL 41/52] target/arm: Convert PMULL.8 " Peter Maydell
2020-02-21 13:07 ` [PULL 42/52] xilinx_spips: Correct the number of dummy cycles for the FAST_READ_4 cmd Peter Maydell
2020-02-21 13:07 ` [PULL 43/52] sh4: Fix PCI ISA IO memory subregion Peter Maydell
2020-02-21 13:07 ` [PULL 44/52] target/arm: Rename isar_feature_aa32_simd_r32 Peter Maydell
2020-02-21 13:07 ` [PULL 45/52] target/arm: Use isar_feature_aa32_simd_r32 more places Peter Maydell
2020-02-21 13:07 ` [PULL 46/52] target/arm: Set MVFR0.FPSP for ARMv5 cpus Peter Maydell
2020-02-21 13:07 ` [PULL 47/52] target/arm: Add isar_feature_aa32_simd_r16 Peter Maydell
2020-02-21 13:07 ` [PULL 48/52] target/arm: Rename isar_feature_aa32_fpdp_v2 Peter Maydell
2020-02-21 13:07 ` [PULL 49/52] target/arm: Add isar_feature_aa32_{fpsp_v2, fpsp_v3, fpdp_v3} Peter Maydell
2020-02-21 13:07 ` [PULL 50/52] target/arm: Perform fpdp_v2 check first Peter Maydell
2020-02-21 13:07 ` [PULL 51/52] target/arm: Replace ARM_FEATURE_VFP3 checks with fp{sp, dp}_v3 Peter Maydell
2020-02-21 13:07 ` [PULL 52/52] target/arm: Add missing checks for fpsp_v2 Peter Maydell
2020-02-21 14:17 ` [PULL 00/52] target-arm queue no-reply
2020-02-21 16:06 ` no-reply
2020-02-21 16:10 ` no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200221130740.7583-40-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).