From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, qemu-arm@nongnu.org
Subject: [PATCH 2/4] target/arm: Convert PMUL.8 to gvec
Date: Wed, 16 Oct 2019 21:42:30 -0700 [thread overview]
Message-ID: <20191017044232.27601-3-richard.henderson@linaro.org> (raw)
In-Reply-To: <20191017044232.27601-1-richard.henderson@linaro.org>
The gvec form will be needed for implementing SVE2.
Extend the implementation to operate on uint64_t instead of uint32_t.
Use a counted inner loop instead of terminating when op1 goes to zero,
looking toward the required implementation for ARMv8.4-DIT.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 3 ++-
target/arm/neon_helper.c | 22 ----------------------
target/arm/translate-a64.c | 10 +++-------
target/arm/translate.c | 11 ++++-------
target/arm/vec_helper.c | 30 ++++++++++++++++++++++++++++++
5 files changed, 39 insertions(+), 37 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index fc0d594a14..800446e537 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -335,7 +335,6 @@ DEF_HELPER_2(neon_sub_u8, i32, i32, i32)
DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
-DEF_HELPER_2(neon_mul_p8, i32, i32, i32)
DEF_HELPER_2(neon_mull_p8, i64, i32, i32)
DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
@@ -689,6 +688,8 @@ DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index c581ffb7d3..9e7a9a1ac5 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -1131,28 +1131,6 @@ NEON_VOP(mul_u16, neon_u16, 2)
/* Polynomial multiplication is like integer multiplication except the
partial products are XORed, not added. */
-uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
-{
- uint32_t mask;
- uint32_t result;
- result = 0;
- while (op1) {
- mask = 0;
- if (op1 & 1)
- mask |= 0xff;
- if (op1 & (1 << 8))
- mask |= (0xff << 8);
- if (op1 & (1 << 16))
- mask |= (0xff << 16);
- if (op1 & (1 << 24))
- mask |= (0xff << 24);
- result ^= op2 & mask;
- op1 = (op1 >> 1) & 0x7f7f7f7f;
- op2 = (op2 << 1) & 0xfefefefe;
- }
- return result;
-}
-
uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2)
{
uint64_t result = 0;
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 255a168df6..04e25cfe06 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -11110,9 +11110,10 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
case 0x13: /* MUL, PMUL */
if (!u) { /* MUL */
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
- return;
+ } else { /* PMUL */
+ gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
}
- break;
+ return;
case 0x12: /* MLA, MLS */
if (u) {
gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
@@ -11242,11 +11243,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genfn = fns[size][u];
break;
}
- case 0x13: /* MUL, PMUL */
- assert(u); /* PMUL */
- assert(size == 0);
- genfn = gen_helper_neon_mul_p8;
- break;
case 0x16: /* SQDMULH, SQRDMULH */
{
static NeonGenTwoOpEnvFn * const fns[2][2] = {
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 598bb1cc00..b66a2f6b71 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5014,16 +5014,17 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case NEON_3R_VMUL: /* VMUL */
if (u) {
- /* Polynomial case allows only P8 and is handled below. */
+ /* Polynomial case allows only P8. */
if (size != 0) {
return 1;
}
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
+ 0, gen_helper_gvec_pmul_b);
} else {
tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
vec_size, vec_size);
- return 0;
}
- break;
+ return 0;
case NEON_3R_VML: /* VMLA, VMLS */
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
@@ -5213,10 +5214,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
tmp2 = neon_load_reg(rd, pass);
gen_neon_add(size, tmp, tmp2);
break;
- case NEON_3R_VMUL:
- /* VMUL.P8; other cases already eliminated. */
- gen_helper_neon_mul_p8(tmp, tmp, tmp2);
- break;
case NEON_3R_VPMAX:
GEN_NEON_INTEGER_OP(pmax);
break;
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index fcb3663903..d401282c6f 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -1134,3 +1134,33 @@ void HELPER(gvec_ushl_h)(void *vd, void *vn, void *vm, uint32_t desc)
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
+
+/*
+ * 8x8->8 polynomial multiply.
+ *
+ * Polynomial multiplication is like integer multiplication except the
+ * partial products are XORed, not added.
+ *
+ * TODO: expose this as a generic vector operation, as it is a common
+ * crypto building block.
+ */
+void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ uint64_t nn = n[i];
+ uint64_t mm = m[i];
+ uint64_t rr = 0;
+
+ for (j = 0; j < 8; ++j) {
+ uint64_t mask = (nn & 0x0101010101010101ull) * 0xff;
+ rr ^= mm & mask;
+ mm = (mm << 1) & 0xfefefefefefefefeull;
+ nn = (nn >> 1) & 0x7f7f7f7f7f7f7f7full;
+ }
+ d[i] = rr;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
--
2.17.1
next prev parent reply other threads:[~2019-10-17 4:43 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-10-17 4:42 [PATCH 0/4] target/arm vector improvements Richard Henderson
2019-10-17 4:42 ` [PATCH 1/4] target/arm: Vectorize USHL and SSHL Richard Henderson
2019-10-17 16:01 ` Alex Bennée
2019-10-18 14:47 ` Richard Henderson
2019-10-17 4:42 ` Richard Henderson [this message]
2019-10-18 13:40 ` [PATCH 2/4] target/arm: Convert PMUL.8 to gvec Alex Bennée
2019-10-17 4:42 ` [PATCH 3/4] target/arm: Convert PMULL.64 " Richard Henderson
2019-10-18 12:24 ` Alex Bennée
2019-10-18 13:40 ` Alex Bennée
2019-10-17 4:42 ` [PATCH 4/4] target/arm: Convert PMULL.8 " Richard Henderson
2019-10-18 17:54 ` Alex Bennée
2019-10-17 5:21 ` [PATCH 0/4] target/arm vector improvements no-reply
2019-10-18 17:58 ` Alex Bennée
2019-11-18 16:26 ` Peter Maydell
2019-11-18 20:05 ` Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191017044232.27601-3-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=peter.maydell@linaro.org \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).