From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 40/52] target/arm: Convert PMULL.64 to gvec
Date: Fri, 21 Feb 2020 13:07:28 +0000 [thread overview]
Message-ID: <20200221130740.7583-41-peter.maydell@linaro.org> (raw)
In-Reply-To: <20200221130740.7583-1-peter.maydell@linaro.org>
From: Richard Henderson <richard.henderson@linaro.org>
The gvec form will be needed for implementing SVE2.
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200216214232.4230-4-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/helper.h | 4 +---
target/arm/neon_helper.c | 30 ------------------------------
target/arm/translate-a64.c | 28 +++-------------------------
target/arm/translate.c | 16 ++--------------
target/arm/vec_helper.c | 33 +++++++++++++++++++++++++++++++++
5 files changed, 39 insertions(+), 72 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 82450a3f965..4352fae3dbf 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -562,9 +562,6 @@ DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_2(dc_zva, void, env, i64)
-DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
-
DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s16, TCG_CALL_NO_RWG,
@@ -696,6 +693,7 @@ DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
#ifdef TARGET_AARCH64
#include "helper-a64.h"
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index 9e7a9a1ac54..6a107da0e11 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -2152,33 +2152,3 @@ void HELPER(neon_zip16)(void *vd, void *vm)
rm[0] = m0;
rd[0] = d0;
}
-
-/* Helper function for 64 bit polynomial multiply case:
- * perform PolynomialMult(op1, op2) and return either the top or
- * bottom half of the 128 bit result.
- */
-uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
-{
- int bitnum;
- uint64_t res = 0;
-
- for (bitnum = 0; bitnum < 64; bitnum++) {
- if (op1 & (1ULL << bitnum)) {
- res ^= op2 << bitnum;
- }
- }
- return res;
-}
-uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
-{
- int bitnum;
- uint64_t res = 0;
-
- /* bit 0 of op1 can't influence the high 64 bits at all */
- for (bitnum = 1; bitnum < 64; bitnum++) {
- if (op1 & (1ULL << bitnum)) {
- res ^= op2 >> (64 - bitnum);
- }
- }
- return res;
-}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index a4fbb18a535..03ce879497d 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -10657,30 +10657,6 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
clear_vec_high(s, is_q, rd);
}
-static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
-{
- /* PMULL of 64 x 64 -> 128 is an odd special case because it
- * is the only three-reg-diff instruction which produces a
- * 128-bit wide result from a single operation. However since
- * it's possible to calculate the two halves more or less
- * separately we just use two helper calls.
- */
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- TCGv_i64 tcg_res = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op1, rn, is_q, MO_64);
- read_vec_element(s, tcg_op2, rm, is_q, MO_64);
- gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
- write_vec_element(s, tcg_res, rd, 0, MO_64);
- gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
- write_vec_element(s, tcg_res, rd, 1, MO_64);
-
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
- tcg_temp_free_i64(tcg_res);
-}
-
/* AdvSIMD three different
* 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
* +---+---+---+-----------+------+---+------+--------+-----+------+------+
@@ -10745,7 +10721,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
- handle_pmull_64(s, is_q, rd, rn, rm);
+ /* The Q field specifies lo/hi half input for this insn. */
+ gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
+ gen_helper_gvec_pmull_q);
return;
}
goto is_widening;
diff --git a/target/arm/translate.c b/target/arm/translate.c
index e8f79899ca7..57d61c4aa57 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5870,23 +5870,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
* outside the loop below as it only performs a single pass.
*/
if (op == 14 && size == 2) {
- TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
-
if (!dc_isar_feature(aa32_pmull, s)) {
return 1;
}
- tcg_rn = tcg_temp_new_i64();
- tcg_rm = tcg_temp_new_i64();
- tcg_rd = tcg_temp_new_i64();
- neon_load_reg64(tcg_rn, rn);
- neon_load_reg64(tcg_rm, rm);
- gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
- neon_store_reg64(tcg_rd, rd);
- gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
- neon_store_reg64(tcg_rd, rd + 1);
- tcg_temp_free_i64(tcg_rn);
- tcg_temp_free_i64(tcg_rm);
- tcg_temp_free_i64(tcg_rd);
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
+ 0, gen_helper_gvec_pmull_q);
return 0;
}
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 854de0e2795..79d2624f7b1 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -1164,3 +1164,36 @@ void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc)
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
+
+/*
+ * 64x64->128 polynomial multiply.
+ * Because of the lanes are not accessed in strict columns,
+ * this probably cannot be turned into a generic helper.
+ */
+void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ intptr_t hi = simd_data(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; i += 2) {
+ uint64_t nn = n[i + hi];
+ uint64_t mm = m[i + hi];
+ uint64_t rhi = 0;
+ uint64_t rlo = 0;
+
+ /* Bit 0 can only influence the low 64-bit result. */
+ if (nn & 1) {
+ rlo = mm;
+ }
+
+ for (j = 1; j < 64; ++j) {
+ uint64_t mask = -((nn >> j) & 1);
+ rlo ^= (mm << j) & mask;
+ rhi ^= (mm >> (64 - j)) & mask;
+ }
+ d[i] = rlo;
+ d[i + 1] = rhi;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
--
2.20.1
next prev parent reply other threads:[~2020-02-21 13:37 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-21 13:06 [PULL 00/52] target-arm queue Peter Maydell
2020-02-21 13:06 ` [PULL 01/52] aspeed/scu: Create separate write callbacks Peter Maydell
2020-02-21 13:06 ` [PULL 02/52] aspeed/scu: Implement chip ID register Peter Maydell
2020-02-21 13:06 ` [PULL 03/52] hw/misc/iotkit-secctl: Fix writing to 'PPC Interrupt Clear' register Peter Maydell
2020-02-21 13:06 ` [PULL 04/52] mainstone: Make providing flash images non-mandatory Peter Maydell
2020-02-21 13:06 ` [PULL 05/52] z2: " Peter Maydell
2020-02-21 13:06 ` [PULL 06/52] target/arm: Flush high bits of sve register after AdvSIMD EXT Peter Maydell
2020-02-21 13:06 ` [PULL 07/52] target/arm: Flush high bits of sve register after AdvSIMD TBL/TBX Peter Maydell
2020-02-21 13:06 ` [PULL 08/52] target/arm: Flush high bits of sve register after AdvSIMD ZIP/UZP/TRN Peter Maydell
2020-02-21 13:06 ` [PULL 09/52] target/arm: Flush high bits of sve register after AdvSIMD INS Peter Maydell
2020-02-21 13:06 ` [PULL 10/52] target/arm: Use bit 55 explicitly for pauth Peter Maydell
2020-02-21 13:06 ` [PULL 11/52] target/arm: Fix select for aa64_va_parameters_both Peter Maydell
2020-02-21 13:07 ` [PULL 12/52] target/arm: Remove ttbr1_valid check from get_phys_addr_lpae Peter Maydell
2020-02-21 13:07 ` [PULL 13/52] target/arm: Split out aa64_va_parameter_tbi, aa64_va_parameter_tbid Peter Maydell
2020-02-21 13:07 ` [PULL 14/52] target/arm: Add _aa32_ to isar_feature functions testing 32-bit ID registers Peter Maydell
2020-02-21 13:07 ` [PULL 15/52] target/arm: Check aa32_pan in take_aarch32_exception(), not aa64_pan Peter Maydell
2020-02-21 13:07 ` [PULL 16/52] target/arm: Add isar_feature_any_fp16 and document naming/usage conventions Peter Maydell
2020-02-21 13:07 ` [PULL 17/52] target/arm: Define and use any_predinv isar_feature test Peter Maydell
2020-02-21 13:07 ` [PULL 18/52] target/arm: Factor out PMU register definitions Peter Maydell
2020-02-21 13:07 ` [PULL 19/52] target/arm: Add and use FIELD definitions for ID_AA64DFR0_EL1 Peter Maydell
2020-02-21 13:07 ` [PULL 20/52] target/arm: Use FIELD macros for clearing ID_DFR0 PERFMON field Peter Maydell
2020-02-21 13:07 ` [PULL 21/52] target/arm: Define an aa32_pmu_8_1 isar feature test function Peter Maydell
2020-02-21 13:07 ` [PULL 22/52] target/arm: Add _aa64_ and _any_ versions of pmu_8_1 isar checks Peter Maydell
2020-02-21 13:07 ` [PULL 23/52] target/arm: Stop assuming DBGDIDR always exists Peter Maydell
2020-02-21 13:07 ` [PULL 24/52] target/arm: Move DBGDIDR into ARMISARegisters Peter Maydell
2020-02-21 13:07 ` [PULL 25/52] target/arm: Read debug-related ID registers from KVM Peter Maydell
2020-02-21 13:07 ` [PULL 26/52] target/arm: Implement ARMv8.1-PMU extension Peter Maydell
2020-02-21 13:07 ` [PULL 27/52] target/arm: Implement ARMv8.4-PMU extension Peter Maydell
2020-02-21 13:07 ` [PULL 28/52] target/arm: Provide ARMv8.4-PMU in '-cpu max' Peter Maydell
2020-02-21 13:07 ` [PULL 29/52] target/arm: Correct definition of PMCRDP Peter Maydell
2020-02-21 13:07 ` [PULL 30/52] target/arm: Correct handling of PMCR_EL0.LC bit Peter Maydell
2020-02-21 13:07 ` [PULL 31/52] target/arm: Test correct register in aa32_pan and aa32_ats1e1 checks Peter Maydell
2020-02-21 13:07 ` [PULL 32/52] target/arm: Use isar_feature function for testing AA32HPD feature Peter Maydell
2020-02-21 13:07 ` [PULL 33/52] target/arm: Use FIELD_EX32 for testing 32-bit fields Peter Maydell
2020-02-21 13:07 ` [PULL 34/52] target/arm: Correctly implement ACTLR2, HACTLR2 Peter Maydell
2020-02-21 13:07 ` [PULL 35/52] hw: usb: hcd-ohci: Move OHCISysBusState and TYPE_SYSBUS_OHCI to include file Peter Maydell
2020-02-21 13:07 ` [PULL 36/52] hcd-ehci: Introduce "companion-enable" sysbus property Peter Maydell
2020-02-21 13:07 ` [PULL 37/52] arm: allwinner: Wire up USB ports Peter Maydell
2020-02-21 13:07 ` [PULL 38/52] target/arm: Vectorize USHL and SSHL Peter Maydell
2020-02-21 13:07 ` [PULL 39/52] target/arm: Convert PMUL.8 to gvec Peter Maydell
2020-02-21 13:07 ` Peter Maydell [this message]
2020-02-21 13:07 ` [PULL 41/52] target/arm: Convert PMULL.8 " Peter Maydell
2020-02-21 13:07 ` [PULL 42/52] xilinx_spips: Correct the number of dummy cycles for the FAST_READ_4 cmd Peter Maydell
2020-02-21 13:07 ` [PULL 43/52] sh4: Fix PCI ISA IO memory subregion Peter Maydell
2020-02-21 13:07 ` [PULL 44/52] target/arm: Rename isar_feature_aa32_simd_r32 Peter Maydell
2020-02-21 13:07 ` [PULL 45/52] target/arm: Use isar_feature_aa32_simd_r32 more places Peter Maydell
2020-02-21 13:07 ` [PULL 46/52] target/arm: Set MVFR0.FPSP for ARMv5 cpus Peter Maydell
2020-02-21 13:07 ` [PULL 47/52] target/arm: Add isar_feature_aa32_simd_r16 Peter Maydell
2020-02-21 13:07 ` [PULL 48/52] target/arm: Rename isar_feature_aa32_fpdp_v2 Peter Maydell
2020-02-21 13:07 ` [PULL 49/52] target/arm: Add isar_feature_aa32_{fpsp_v2, fpsp_v3, fpdp_v3} Peter Maydell
2020-02-21 13:07 ` [PULL 50/52] target/arm: Perform fpdp_v2 check first Peter Maydell
2020-02-21 13:07 ` [PULL 51/52] target/arm: Replace ARM_FEATURE_VFP3 checks with fp{sp, dp}_v3 Peter Maydell
2020-02-21 13:07 ` [PULL 52/52] target/arm: Add missing checks for fpsp_v2 Peter Maydell
2020-02-21 14:17 ` [PULL 00/52] target-arm queue no-reply
2020-02-21 16:06 ` no-reply
2020-02-21 16:10 ` no-reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200221130740.7583-41-peter.maydell@linaro.org \
--to=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).