* [PATCH v3 0/9] target/arm: Implement SVE2 Crypto Extensions
@ 2020-04-30 2:02 Richard Henderson
2020-04-30 2:02 ` [PATCH v3 1/9] target/arm: Convert aes and sm4 to gvec helpers Richard Henderson
` (8 more replies)
0 siblings, 9 replies; 10+ messages in thread
From: Richard Henderson @ 2020-04-30 2:02 UTC (permalink / raw)
To: qemu-devel; +Cc: steplong
Stephen, what I was looking for when I talked about modifying
the existing helpers is patch 1, which simplifies the final 3
patches for the new SVE2 insns.
In the process I found that the existing implementation of the
AdvSIMD insns is buggy wrt SVE. We need to clear the bits in
the Zreg destination beyond the first 128.
I could have done this via clear_vec_high in translate-a64.c,
but since we already have a function call, we're better off
doing the clearing out of line. This means adding a desc
parameter so that we know the total vector length.
r~
Based-on: https://github.com/rth7680/qemu/commit/8c3a91e4e487ef840193a489e5457165530666fc
Richard Henderson (9):
target/arm: Convert aes and sm4 to gvec helpers
target/arm: Convert rax1 to gvec helpers
target/arm: Convert sha512 and sm3 to gvec helpers
target/arm: Convert sha1 and sha256 to gvec helpers
target/arm: Split helper_crypto_sha1_3reg
target/arm: Split helper_crypto_sm3tt
target/arm: Implement SVE2 crypto unary operations
target/arm: Implement SVE2 crypto destructive binary operations
target/arm: Implement SVE2 crypto constructive binary operations
target/arm/cpu.h | 10 ++
target/arm/helper.h | 45 ++++---
target/arm/translate-a64.h | 2 +
target/arm/vec_internal.h | 10 ++
target/arm/sve.decode | 17 +++
target/arm/crypto_helper.c | 267 ++++++++++++++++++++++++++-----------
target/arm/translate-a64.c | 211 +++++++++++++----------------
target/arm/translate-sve.c | 79 +++++++++++
target/arm/translate.c | 125 +++++++++--------
target/arm/vec_helper.c | 10 --
10 files changed, 491 insertions(+), 285 deletions(-)
--
2.20.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v3 1/9] target/arm: Convert aes and sm4 to gvec helpers
2020-04-30 2:02 [PATCH v3 0/9] target/arm: Implement SVE2 Crypto Extensions Richard Henderson
@ 2020-04-30 2:02 ` Richard Henderson
2020-04-30 2:02 ` [PATCH v3 2/9] target/arm: Convert rax1 " Richard Henderson
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2020-04-30 2:02 UTC (permalink / raw)
To: qemu-devel; +Cc: steplong
With this conversion, we will be able to use the same helpers
with sve. This also fixes a bug in which we failed to clear
the high bits of the SVE register after an AdvSIMD operation.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 6 ++--
target/arm/vec_internal.h | 10 ++++++
target/arm/crypto_helper.c | 72 +++++++++++++++++++++++++++-----------
target/arm/translate-a64.c | 56 ++++++++++++++++++-----------
target/arm/translate.c | 27 +++++++-------
target/arm/vec_helper.c | 10 ------
6 files changed, 115 insertions(+), 66 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 032d5cdfbd..6623b6689a 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -513,7 +513,7 @@ DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr)
DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr)
DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -534,8 +534,8 @@ DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_2(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h
index ac365123c4..7eaffeb060 100644
--- a/target/arm/vec_internal.h
+++ b/target/arm/vec_internal.h
@@ -20,6 +20,16 @@
#ifndef TARGET_ARM_VEC_INTERNALS_H
#define TARGET_ARM_VEC_INTERNALS_H
+static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
+{
+ uint64_t *d = vd + opr_sz;
+ uintptr_t i;
+
+ for (i = opr_sz; i < max_sz; i += 8) {
+ *d++ = 0;
+ }
+}
+
static inline int32_t do_sqrshl_bhs(int32_t src, int32_t shift, int bits,
bool round, uint32_t *sat)
{
diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c
index f800266727..6bd5a3d2d0 100644
--- a/target/arm/crypto_helper.c
+++ b/target/arm/crypto_helper.c
@@ -13,7 +13,9 @@
#include "cpu.h"
#include "exec/helper-proto.h"
+#include "tcg/tcg-gvec-desc.h"
#include "crypto/aes.h"
+#include "vec_internal.h"
union CRYPTO_STATE {
uint8_t bytes[16];
@@ -29,18 +31,15 @@ union CRYPTO_STATE {
#define CR_ST_WORD(state, i) (state.words[i])
#endif
-void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt)
+static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
+ uint64_t *rm, bool decrypt)
{
static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
- uint64_t *rd = vd;
- uint64_t *rm = vm;
union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
- union CRYPTO_STATE st = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
int i;
- assert(decrypt < 2);
-
/* xor state vector with round key */
rk.l[0] ^= st.l[0];
rk.l[1] ^= st.l[1];
@@ -54,7 +53,18 @@ void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt)
rd[1] = st.l[1];
}
-void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt)
+void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ bool decrypt = simd_data(desc);
+
+ for (i = 0; i < opr_sz; i += 16) {
+ do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
+ }
+ clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
+static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
{
static uint32_t const mc[][256] = { {
/* MixColumns lookup table */
@@ -190,13 +200,9 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt)
0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
} };
- uint64_t *rd = vd;
- uint64_t *rm = vm;
union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
int i;
- assert(decrypt < 2);
-
for (i = 0; i < 16; i += 4) {
CR_ST_WORD(st, i >> 2) =
mc[decrypt][CR_ST_BYTE(st, i)] ^
@@ -209,6 +215,17 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt)
rd[1] = st.l[1];
}
+void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ bool decrypt = simd_data(desc);
+
+ for (i = 0; i < opr_sz; i += 16) {
+ do_crypto_aesmc(vd + i, vm + i, decrypt);
+ }
+ clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
/*
* SHA-1 logical functions
*/
@@ -638,12 +655,10 @@ static uint8_t const sm4_sbox[] = {
0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
};
-void HELPER(crypto_sm4e)(void *vd, void *vn)
+static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
- union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
uint32_t t, i;
for (i = 0; i < 4; i++) {
@@ -665,11 +680,18 @@ void HELPER(crypto_sm4e)(void *vd, void *vn)
rd[1] = d.l[1];
}
-void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm)
+void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+
+ for (i = 0; i < opr_sz; i += 16) {
+ do_crypto_sm4e(vd + i, vn + i, vm + i);
+ }
+ clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
+static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
union CRYPTO_STATE d;
union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
@@ -693,3 +715,13 @@ void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm)
rd[0] = d.l[0];
rd[1] = d.l[1];
}
+
+void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+
+ for (i = 0; i < opr_sz; i += 16) {
+ do_crypto_sm4ekey(vd + i, vn + i, vm + i);
+ }
+ clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 632c1c34ed..b2adf3a39e 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -682,6 +682,15 @@ static void gen_gvec_op2(DisasContext *s, bool is_q, int rd,
is_q ? 16 : 8, vec_full_reg_size(s), gvec_op);
}
+/* Expand a 2-operand operation using an out-of-line helper. */
+static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
+ int rn, int data, gen_helper_gvec_2 *fn)
+{
+ tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
+}
+
/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */
static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
int rn, int rm, const GVecGen3 *gvec_op)
@@ -14455,9 +14464,8 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
int decrypt;
- TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
- TCGv_i32 tcg_decrypt;
- CryptoThreeOpIntFn *genfn;
+ gen_helper_gvec_2 *genfn2 = NULL;
+ gen_helper_gvec_3 *genfn3 = NULL;
if (!dc_isar_feature(aa64_aes, s) || size != 0) {
unallocated_encoding(s);
@@ -14467,19 +14475,19 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
switch (opcode) {
case 0x4: /* AESE */
decrypt = 0;
- genfn = gen_helper_crypto_aese;
+ genfn3 = gen_helper_crypto_aese;
break;
case 0x6: /* AESMC */
decrypt = 0;
- genfn = gen_helper_crypto_aesmc;
+ genfn2 = gen_helper_crypto_aesmc;
break;
case 0x5: /* AESD */
decrypt = 1;
- genfn = gen_helper_crypto_aese;
+ genfn3 = gen_helper_crypto_aese;
break;
case 0x7: /* AESIMC */
decrypt = 1;
- genfn = gen_helper_crypto_aesmc;
+ genfn2 = gen_helper_crypto_aesmc;
break;
default:
unallocated_encoding(s);
@@ -14489,16 +14497,11 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
-
- tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- tcg_rn_ptr = vec_full_reg_ptr(s, rn);
- tcg_decrypt = tcg_const_i32(decrypt);
-
- genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
-
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
- tcg_temp_free_i32(tcg_decrypt);
+ if (genfn2) {
+ gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2);
+ } else {
+ gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3);
+ }
}
/* Crypto three-reg SHA
@@ -14641,13 +14644,15 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
*/
static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
{
+ static const GVecGen3 sm4ekey_op = { .fno = gen_helper_crypto_sm4ekey };
int opcode = extract32(insn, 10, 2);
int o = extract32(insn, 14, 1);
int rm = extract32(insn, 16, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
bool feature;
- CryptoThreeOpFn *genfn;
+ CryptoThreeOpFn *genfn = NULL;
+ const GVecGen3 *gvecop = NULL;
if (o == 0) {
switch (opcode) {
@@ -14682,7 +14687,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
break;
case 2: /* SM4EKEY */
feature = dc_isar_feature(aa64_sm4, s);
- genfn = gen_helper_crypto_sm4ekey;
+ gvecop = &sm4ekey_op;
break;
default:
unallocated_encoding(s);
@@ -14699,6 +14704,11 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
return;
}
+ if (gvecop) {
+ gen_gvec_op3(s, true, rd, rn, rm, gvecop);
+ return;
+ }
+
if (genfn) {
TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
@@ -14751,6 +14761,7 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
bool feature;
CryptoTwoOpFn *genfn;
+ gen_helper_gvec_3 *gvecfn = NULL;
switch (opcode) {
case 0: /* SHA512SU0 */
@@ -14759,7 +14770,7 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
break;
case 1: /* SM4E */
feature = dc_isar_feature(aa64_sm4, s);
- genfn = gen_helper_crypto_sm4e;
+ gvecfn = gen_helper_crypto_sm4e;
break;
default:
unallocated_encoding(s);
@@ -14775,6 +14786,11 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
return;
}
+ if (gvecfn) {
+ gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gvecfn);
+ return;
+ }
+
tcg_rd_ptr = vec_full_reg_ptr(s, rd);
tcg_rn_ptr = vec_full_reg_ptr(s, rn);
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 94a81b8323..caa98ed309 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -6925,22 +6925,23 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
return 1;
}
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rm);
-
- /* Bit 6 is the lowest opcode bit; it distinguishes between
- * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
- */
- tmp3 = tcg_const_i32(extract32(insn, 6, 1));
-
+ /*
+ * Bit 6 is the lowest opcode bit; it distinguishes
+ * between encryption (AESE/AESMC) and decryption
+ * (AESD/AESIMC).
+ */
if (op == NEON_2RM_AESE) {
- gen_helper_crypto_aese(ptr1, ptr2, tmp3);
+ tcg_gen_gvec_3_ool(vfp_reg_offset(true, rd),
+ vfp_reg_offset(true, rd),
+ vfp_reg_offset(true, rm),
+ 16, 16, extract32(insn, 6, 1),
+ gen_helper_crypto_aese);
} else {
- gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
+ tcg_gen_gvec_2_ool(vfp_reg_offset(true, rd),
+ vfp_reg_offset(true, rm),
+ 16, 16, extract32(insn, 6, 1),
+ gen_helper_crypto_aesmc);
}
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- tcg_temp_free_i32(tmp3);
break;
case NEON_2RM_SHA1H:
if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 2339be124c..25dca56090 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -38,16 +38,6 @@
#define H4(x) (x)
#endif
-static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
-{
- uint64_t *d = vd + opr_sz;
- uintptr_t i;
-
- for (i = opr_sz; i < max_sz; i += 8) {
- *d++ = 0;
- }
-}
-
/* Signed saturating rounding doubling multiply-accumulate high half, 8-bit */
void HELPER(sve2_sqrdmlah_b)(void *vd, void *vn, void *vm,
void *va, uint32_t desc)
--
2.20.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 2/9] target/arm: Convert rax1 to gvec helpers
2020-04-30 2:02 [PATCH v3 0/9] target/arm: Implement SVE2 Crypto Extensions Richard Henderson
2020-04-30 2:02 ` [PATCH v3 1/9] target/arm: Convert aes and sm4 to gvec helpers Richard Henderson
@ 2020-04-30 2:02 ` Richard Henderson
2020-04-30 2:02 ` [PATCH v3 3/9] target/arm: Convert sha512 and sm3 " Richard Henderson
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2020-04-30 2:02 UTC (permalink / raw)
To: qemu-devel; +Cc: steplong
With this conversion, we will be able to use the same helpers
with sve. This also fixes a bug in which we failed to clear
the high bits of the SVE register after an AdvSIMD operation.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 2 ++
target/arm/translate-a64.h | 2 ++
target/arm/crypto_helper.c | 11 ++++++++
target/arm/translate-a64.c | 53 ++++++++++++++++++--------------------
4 files changed, 40 insertions(+), 28 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 6623b6689a..96cf4464be 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -537,6 +537,8 @@ DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
index 84c40377bd..f2250a8dd1 100644
--- a/target/arm/translate-a64.h
+++ b/target/arm/translate-a64.h
@@ -141,4 +141,6 @@ void arm_gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, int64_t shift,
uint32_t opr_sz, uint32_t max_sz);
+extern const GVecGen3 rax1_op;
+
#endif /* TARGET_ARM_TRANSLATE_A64_H */
diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c
index 6bd5a3d2d0..372d8350e4 100644
--- a/target/arm/crypto_helper.c
+++ b/target/arm/crypto_helper.c
@@ -725,3 +725,14 @@ void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
}
clear_tail(vd, opr_sz, simd_maxsz(desc));
}
+
+void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ d[i] = n[i] ^ rol64(m[i], 1);
+ }
+ clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index b2adf3a39e..2eb4315b6d 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -14636,6 +14636,29 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
tcg_temp_free_ptr(tcg_rn_ptr);
}
+static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ tcg_gen_rotli_i64(d, m, 1);
+ tcg_gen_xor_i64(d, d, n);
+}
+
+static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
+{
+ tcg_gen_rotli_vec(vece, d, m, 1);
+ tcg_gen_xor_vec(vece, d, d, n);
+}
+
+static const TCGOpcode rax1_opc[] = { INDEX_op_rotli_vec, 0 };
+
+const GVecGen3 rax1_op =
+{
+ .fni8 = gen_rax1_i64,
+ .fniv = gen_rax1_vec,
+ .opt_opc = rax1_opc,
+ .fno = gen_helper_crypto_rax1,
+ .vece = MO_64,
+};
+
/* Crypto three-reg SHA512
* 31 21 20 16 15 14 13 12 11 10 9 5 4 0
* +-----------------------+------+---+---+-----+--------+------+------+
@@ -14670,7 +14693,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
break;
case 3: /* RAX1 */
feature = dc_isar_feature(aa64_sha3, s);
- genfn = NULL;
+ gvecop = &rax1_op;
break;
default:
g_assert_not_reached();
@@ -14706,10 +14729,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
if (gvecop) {
gen_gvec_op3(s, true, rd, rn, rm, gvecop);
- return;
- }
-
- if (genfn) {
+ } else {
TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
tcg_rd_ptr = vec_full_reg_ptr(s, rd);
@@ -14721,29 +14741,6 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
tcg_temp_free_ptr(tcg_rd_ptr);
tcg_temp_free_ptr(tcg_rn_ptr);
tcg_temp_free_ptr(tcg_rm_ptr);
- } else {
- TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
- int pass;
-
- tcg_op1 = tcg_temp_new_i64();
- tcg_op2 = tcg_temp_new_i64();
- tcg_res[0] = tcg_temp_new_i64();
- tcg_res[1] = tcg_temp_new_i64();
-
- for (pass = 0; pass < 2; pass++) {
- read_vec_element(s, tcg_op1, rn, pass, MO_64);
- read_vec_element(s, tcg_op2, rm, pass, MO_64);
-
- tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1);
- tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
- }
- write_vec_element(s, tcg_res[0], rd, 0, MO_64);
- write_vec_element(s, tcg_res[1], rd, 1, MO_64);
-
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
- tcg_temp_free_i64(tcg_res[0]);
- tcg_temp_free_i64(tcg_res[1]);
}
}
--
2.20.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 3/9] target/arm: Convert sha512 and sm3 to gvec helpers
2020-04-30 2:02 [PATCH v3 0/9] target/arm: Implement SVE2 Crypto Extensions Richard Henderson
2020-04-30 2:02 ` [PATCH v3 1/9] target/arm: Convert aes and sm4 to gvec helpers Richard Henderson
2020-04-30 2:02 ` [PATCH v3 2/9] target/arm: Convert rax1 " Richard Henderson
@ 2020-04-30 2:02 ` Richard Henderson
2020-04-30 2:02 ` [PATCH v3 4/9] target/arm: Convert sha1 and sha256 " Richard Henderson
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2020-04-30 2:02 UTC (permalink / raw)
To: qemu-devel; +Cc: steplong
Do not yet convert the helpers to loop over opr_sz, but the
descriptor allows the vector tail to be cleared. Which fixes
an existing bug.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 15 +++++----
target/arm/crypto_helper.c | 37 +++++++++++++++++++----
target/arm/translate-a64.c | 62 +++++++++++++++-----------------------
3 files changed, 64 insertions(+), 50 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 96cf4464be..d564747808 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -525,14 +525,17 @@ DEF_HELPER_FLAGS_3(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr)
DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_2(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sha512su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
-DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c
index 372d8350e4..637e4c00bb 100644
--- a/target/arm/crypto_helper.c
+++ b/target/arm/crypto_helper.c
@@ -31,6 +31,19 @@ union CRYPTO_STATE {
#define CR_ST_WORD(state, i) (state.words[i])
#endif
+/*
+ * The caller has not been converted to full gvec, and so only
+ * modifies the low 16 bytes of the vector register.
+ */
+static void clear_tail_16(void *vd, uint32_t desc)
+{
+ int opr_sz = simd_oprsz(desc);
+ int max_sz = simd_maxsz(desc);
+
+ assert(opr_sz == 16);
+ clear_tail(vd, opr_sz, max_sz);
+}
+
static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
uint64_t *rm, bool decrypt)
{
@@ -470,7 +483,7 @@ static uint64_t s1_512(uint64_t x)
return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
}
-void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
{
uint64_t *rd = vd;
uint64_t *rn = vn;
@@ -483,9 +496,11 @@ void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm)
rd[0] = d0;
rd[1] = d1;
+
+ clear_tail_16(vd, desc);
}
-void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
{
uint64_t *rd = vd;
uint64_t *rn = vn;
@@ -498,9 +513,11 @@ void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm)
rd[0] = d0;
rd[1] = d1;
+
+ clear_tail_16(vd, desc);
}
-void HELPER(crypto_sha512su0)(void *vd, void *vn)
+void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
{
uint64_t *rd = vd;
uint64_t *rn = vn;
@@ -512,9 +529,11 @@ void HELPER(crypto_sha512su0)(void *vd, void *vn)
rd[0] = d0;
rd[1] = d1;
+
+ clear_tail_16(vd, desc);
}
-void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
{
uint64_t *rd = vd;
uint64_t *rn = vn;
@@ -522,9 +541,11 @@ void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm)
rd[0] += s1_512(rn[0]) + rm[0];
rd[1] += s1_512(rn[1]) + rm[1];
+
+ clear_tail_16(vd, desc);
}
-void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
{
uint64_t *rd = vd;
uint64_t *rn = vn;
@@ -548,9 +569,11 @@ void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm)
rd[0] = d.l[0];
rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
}
-void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
{
uint64_t *rd = vd;
uint64_t *rn = vn;
@@ -568,6 +591,8 @@ void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm)
rd[0] = d.l[0];
rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
}
void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 2eb4315b6d..c737a409d0 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -14668,28 +14668,36 @@ const GVecGen3 rax1_op =
static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
{
static const GVecGen3 sm4ekey_op = { .fno = gen_helper_crypto_sm4ekey };
+ static const GVecGen3 sha512h_op = { .fno = gen_helper_crypto_sha512h };
+ static const GVecGen3 sha512h2_op
+ = { .fno = gen_helper_crypto_sha512h2 };
+ static const GVecGen3 sha512su1_op
+ = { .fno = gen_helper_crypto_sha512su1 };
+ static const GVecGen3 sm3partw1_op
+ = { .fno = gen_helper_crypto_sm3partw1 };
+ static const GVecGen3 sm3partw2_op
+ = { .fno = gen_helper_crypto_sm3partw2 };
int opcode = extract32(insn, 10, 2);
int o = extract32(insn, 14, 1);
int rm = extract32(insn, 16, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
bool feature;
- CryptoThreeOpFn *genfn = NULL;
const GVecGen3 *gvecop = NULL;
if (o == 0) {
switch (opcode) {
case 0: /* SHA512H */
feature = dc_isar_feature(aa64_sha512, s);
- genfn = gen_helper_crypto_sha512h;
+ gvecop = &sha512h_op;
break;
case 1: /* SHA512H2 */
feature = dc_isar_feature(aa64_sha512, s);
- genfn = gen_helper_crypto_sha512h2;
+ gvecop = &sha512h2_op;
break;
case 2: /* SHA512SU1 */
feature = dc_isar_feature(aa64_sha512, s);
- genfn = gen_helper_crypto_sha512su1;
+ gvecop = &sha512su1_op;
break;
case 3: /* RAX1 */
feature = dc_isar_feature(aa64_sha3, s);
@@ -14702,11 +14710,11 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
switch (opcode) {
case 0: /* SM3PARTW1 */
feature = dc_isar_feature(aa64_sm3, s);
- genfn = gen_helper_crypto_sm3partw1;
+ gvecop = &sm3partw1_op;
break;
case 1: /* SM3PARTW2 */
feature = dc_isar_feature(aa64_sm3, s);
- genfn = gen_helper_crypto_sm3partw2;
+ gvecop = &sm3partw2_op;
break;
case 2: /* SM4EKEY */
feature = dc_isar_feature(aa64_sm4, s);
@@ -14726,22 +14734,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
-
- if (gvecop) {
- gen_gvec_op3(s, true, rd, rn, rm, gvecop);
- } else {
- TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
-
- tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- tcg_rn_ptr = vec_full_reg_ptr(s, rn);
- tcg_rm_ptr = vec_full_reg_ptr(s, rm);
-
- genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
-
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
- tcg_temp_free_ptr(tcg_rm_ptr);
- }
+ gen_gvec_op3(s, true, rd, rn, rm, gvecop);
}
/* Crypto two-reg SHA512
@@ -14755,19 +14748,14 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
int opcode = extract32(insn, 10, 2);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
bool feature;
- CryptoTwoOpFn *genfn;
- gen_helper_gvec_3 *gvecfn = NULL;
switch (opcode) {
case 0: /* SHA512SU0 */
feature = dc_isar_feature(aa64_sha512, s);
- genfn = gen_helper_crypto_sha512su0;
break;
case 1: /* SM4E */
feature = dc_isar_feature(aa64_sm4, s);
- gvecfn = gen_helper_crypto_sm4e;
break;
default:
unallocated_encoding(s);
@@ -14783,18 +14771,16 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
return;
}
- if (gvecfn) {
- gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gvecfn);
- return;
+ switch (opcode) {
+ case 0: /* SHA512SU0 */
+ gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
+ break;
+ case 1: /* SM4E */
+ gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
+ break;
+ default:
+ g_assert_not_reached();
}
-
- tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- tcg_rn_ptr = vec_full_reg_ptr(s, rn);
-
- genfn(tcg_rd_ptr, tcg_rn_ptr);
-
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
}
/* Crypto four-register
--
2.20.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 4/9] target/arm: Convert sha1 and sha256 to gvec helpers
2020-04-30 2:02 [PATCH v3 0/9] target/arm: Implement SVE2 Crypto Extensions Richard Henderson
` (2 preceding siblings ...)
2020-04-30 2:02 ` [PATCH v3 3/9] target/arm: Convert sha512 and sm3 " Richard Henderson
@ 2020-04-30 2:02 ` Richard Henderson
2020-04-30 2:02 ` [PATCH v3 5/9] target/arm: Split helper_crypto_sha1_3reg Richard Henderson
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2020-04-30 2:02 UTC (permalink / raw)
To: qemu-devel; +Cc: steplong
Do not yet convert the helpers to loop over opr_sz, but the
descriptor allows the vector tail to be cleared. Which fixes
an existing bug.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 12 +++++------
target/arm/crypto_helper.c | 24 +++++++++++++++------
target/arm/translate-a64.c | 34 +++++++++++------------------
target/arm/translate.c | 44 +++++++++++++++-----------------------
4 files changed, 53 insertions(+), 61 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index d564747808..07466ddc6c 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -517,13 +517,13 @@ DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_2(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_2(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr)
+DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c
index 637e4c00bb..7124745c32 100644
--- a/target/arm/crypto_helper.c
+++ b/target/arm/crypto_helper.c
@@ -303,7 +303,7 @@ void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op)
rd[1] = d.l[1];
}
-void HELPER(crypto_sha1h)(void *vd, void *vm)
+void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
{
uint64_t *rd = vd;
uint64_t *rm = vm;
@@ -314,9 +314,11 @@ void HELPER(crypto_sha1h)(void *vd, void *vm)
rd[0] = m.l[0];
rd[1] = m.l[1];
+
+ clear_tail_16(vd, desc);
}
-void HELPER(crypto_sha1su1)(void *vd, void *vm)
+void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
{
uint64_t *rd = vd;
uint64_t *rm = vm;
@@ -330,6 +332,8 @@ void HELPER(crypto_sha1su1)(void *vd, void *vm)
rd[0] = d.l[0];
rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
}
/*
@@ -357,7 +361,7 @@ static uint32_t s1(uint32_t x)
return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
}
-void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
{
uint64_t *rd = vd;
uint64_t *rn = vn;
@@ -388,9 +392,11 @@ void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm)
rd[0] = d.l[0];
rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
}
-void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
{
uint64_t *rd = vd;
uint64_t *rn = vn;
@@ -413,9 +419,11 @@ void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm)
rd[0] = d.l[0];
rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
}
-void HELPER(crypto_sha256su0)(void *vd, void *vm)
+void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
{
uint64_t *rd = vd;
uint64_t *rm = vm;
@@ -429,9 +437,11 @@ void HELPER(crypto_sha256su0)(void *vd, void *vm)
rd[0] = d.l[0];
rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
}
-void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm)
+void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
{
uint64_t *rd = vd;
uint64_t *rn = vn;
@@ -447,6 +457,8 @@ void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm)
rd[0] = d.l[0];
rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
}
/*
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index c737a409d0..48f71e01e4 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -14517,8 +14517,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
int rm = extract32(insn, 16, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- CryptoThreeOpFn *genfn;
- TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
+ gen_helper_gvec_3 *genfn;
bool feature;
if (size != 0) {
@@ -14560,23 +14559,22 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
return;
}
- tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- tcg_rn_ptr = vec_full_reg_ptr(s, rn);
- tcg_rm_ptr = vec_full_reg_ptr(s, rm);
-
if (genfn) {
- genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
+ gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
} else {
TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
+ TCGv_ptr tcg_rd_ptr = vec_full_reg_ptr(s, rd);
+ TCGv_ptr tcg_rn_ptr = vec_full_reg_ptr(s, rn);
+ TCGv_ptr tcg_rm_ptr = vec_full_reg_ptr(s, rm);
gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
tcg_rm_ptr, tcg_opcode);
- tcg_temp_free_i32(tcg_opcode);
- }
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
- tcg_temp_free_ptr(tcg_rm_ptr);
+ tcg_temp_free_i32(tcg_opcode);
+ tcg_temp_free_ptr(tcg_rd_ptr);
+ tcg_temp_free_ptr(tcg_rn_ptr);
+ tcg_temp_free_ptr(tcg_rm_ptr);
+ }
}
/* Crypto two-reg SHA
@@ -14591,9 +14589,8 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
int opcode = extract32(insn, 12, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- CryptoTwoOpFn *genfn;
+ gen_helper_gvec_2 *genfn;
bool feature;
- TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
if (size != 0) {
unallocated_encoding(s);
@@ -14626,14 +14623,7 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
-
- tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- tcg_rn_ptr = vec_full_reg_ptr(s, rn);
-
- genfn(tcg_rd_ptr, tcg_rn_ptr);
-
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
+ gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
}
static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
diff --git a/target/arm/translate.c b/target/arm/translate.c
index caa98ed309..517048e982 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5364,28 +5364,30 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
tmp4 = tcg_const_i32(size);
gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
tcg_temp_free_i32(tmp4);
+ tcg_temp_free_ptr(ptr1);
+ tcg_temp_free_ptr(ptr2);
+ tcg_temp_free_ptr(ptr3);
} else { /* SHA-256 */
- if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
+ gen_helper_gvec_3 *fn;
+
+ if (!dc_isar_feature(aa32_sha2, s)) {
return 1;
}
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rn);
- ptr3 = vfp_reg_ptr(true, rm);
switch (size) {
case 0:
- gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
+ fn = gen_helper_crypto_sha256h;
break;
case 1:
- gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
+ fn = gen_helper_crypto_sha256h2;
break;
case 2:
- gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
+ fn = gen_helper_crypto_sha256su1;
break;
+ default:
+ return 1;
}
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16, 0, fn);
}
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- tcg_temp_free_ptr(ptr3);
return 0;
case NEON_3R_VPADD_VQRDMLAH:
@@ -6947,13 +6949,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
return 1;
}
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rm);
-
- gen_helper_crypto_sha1h(ptr1, ptr2);
-
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
+ tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0,
+ gen_helper_crypto_sha1h);
break;
case NEON_2RM_SHA1SU1:
if ((rm | rd) & 1) {
@@ -6967,17 +6964,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
} else if (!dc_isar_feature(aa32_sha1, s)) {
return 1;
}
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rm);
- if (q) {
- gen_helper_crypto_sha256su0(ptr1, ptr2);
- } else {
- gen_helper_crypto_sha1su1(ptr1, ptr2);
- }
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
+ tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0,
+ q ? gen_helper_crypto_sha256su0
+ : gen_helper_crypto_sha1su1);
break;
-
case NEON_2RM_VMVN:
tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
break;
--
2.20.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 5/9] target/arm: Split helper_crypto_sha1_3reg
2020-04-30 2:02 [PATCH v3 0/9] target/arm: Implement SVE2 Crypto Extensions Richard Henderson
` (3 preceding siblings ...)
2020-04-30 2:02 ` [PATCH v3 4/9] target/arm: Convert sha1 and sha256 " Richard Henderson
@ 2020-04-30 2:02 ` Richard Henderson
2020-04-30 2:03 ` [PATCH v3 6/9] target/arm: Split helper_crypto_sm3tt Richard Henderson
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2020-04-30 2:02 UTC (permalink / raw)
To: qemu-devel; +Cc: steplong
Rather than passing an opcode to a helper, fully decode the
operation at translate time. Use clear_tail_16 to zap the
balance of the SVE register with the AdvSIMD write.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 5 +-
target/arm/crypto_helper.c | 99 ++++++++++++++++++++++++--------------
target/arm/translate-a64.c | 29 +++++------
target/arm/translate.c | 70 +++++++++++++++------------
4 files changed, 116 insertions(+), 87 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 07466ddc6c..24aca28a05 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -516,7 +516,10 @@ DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr)
DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c
index 7124745c32..636683d0f1 100644
--- a/target/arm/crypto_helper.c
+++ b/target/arm/crypto_helper.c
@@ -24,11 +24,11 @@ union CRYPTO_STATE {
};
#ifdef HOST_WORDS_BIGENDIAN
-#define CR_ST_BYTE(state, i) (state.bytes[(15 - (i)) ^ 8])
-#define CR_ST_WORD(state, i) (state.words[(3 - (i)) ^ 2])
+#define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8])
+#define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2])
#else
-#define CR_ST_BYTE(state, i) (state.bytes[i])
-#define CR_ST_WORD(state, i) (state.words[i])
+#define CR_ST_BYTE(state, i) ((state).bytes[i])
+#define CR_ST_WORD(state, i) ((state).words[i])
#endif
/*
@@ -258,49 +258,74 @@ static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
return (x & y) | ((x | y) & z);
}
-void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op)
+void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t d0, d1;
+
+ d0 = d[1] ^ d[0] ^ m[0];
+ d1 = n[0] ^ d[1] ^ m[1];
+ d[0] = d0;
+ d[1] = d1;
+
+ clear_tail_16(vd, desc);
+}
+
+static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
+ uint64_t *rm, uint32_t desc,
+ uint32_t (*fn)(union CRYPTO_STATE *d))
{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ int i;
- if (op == 3) { /* sha1su0 */
- d.l[0] ^= d.l[1] ^ m.l[0];
- d.l[1] ^= n.l[0] ^ m.l[1];
- } else {
- int i;
+ for (i = 0; i < 4; i++) {
+ uint32_t t = fn(&d);
- for (i = 0; i < 4; i++) {
- uint32_t t;
+ t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
+ + CR_ST_WORD(m, i);
- switch (op) {
- case 0: /* sha1c */
- t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
- break;
- case 1: /* sha1p */
- t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
- break;
- case 2: /* sha1m */
- t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
- break;
- default:
- g_assert_not_reached();
- }
- t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
- + CR_ST_WORD(m, i);
-
- CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
- CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
- CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
- CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
- CR_ST_WORD(d, 0) = t;
- }
+ CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
+ CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
+ CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
+ CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
+ CR_ST_WORD(d, 0) = t;
}
rd[0] = d.l[0];
rd[1] = d.l[1];
+
+ clear_tail_16(rd, desc);
+}
+
+static uint32_t do_sha1c(union CRYPTO_STATE *d)
+{
+ return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
+}
+
+void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
+}
+
+static uint32_t do_sha1p(union CRYPTO_STATE *d)
+{
+ return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
+}
+
+void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
+}
+
+static uint32_t do_sha1m(union CRYPTO_STATE *d)
+{
+ return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
+}
+
+void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
}
void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 48f71e01e4..81ad287811 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -14527,10 +14527,19 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
switch (opcode) {
case 0: /* SHA1C */
+ genfn = gen_helper_crypto_sha1c;
+ feature = dc_isar_feature(aa64_sha1, s);
+ break;
case 1: /* SHA1P */
+ genfn = gen_helper_crypto_sha1p;
+ feature = dc_isar_feature(aa64_sha1, s);
+ break;
case 2: /* SHA1M */
+ genfn = gen_helper_crypto_sha1m;
+ feature = dc_isar_feature(aa64_sha1, s);
+ break;
case 3: /* SHA1SU0 */
- genfn = NULL;
+ genfn = gen_helper_crypto_sha1su0;
feature = dc_isar_feature(aa64_sha1, s);
break;
case 4: /* SHA256H */
@@ -14558,23 +14567,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
-
- if (genfn) {
- gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
- } else {
- TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
- TCGv_ptr tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- TCGv_ptr tcg_rn_ptr = vec_full_reg_ptr(s, rn);
- TCGv_ptr tcg_rm_ptr = vec_full_reg_ptr(s, rm);
-
- gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
- tcg_rm_ptr, tcg_opcode);
-
- tcg_temp_free_i32(tcg_opcode);
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
- tcg_temp_free_ptr(tcg_rm_ptr);
- }
+ gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
}
/* Crypto two-reg SHA
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 517048e982..34ea5e53e7 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5305,7 +5305,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
int vec_size;
uint32_t imm;
TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
- TCGv_ptr ptr1, ptr2, ptr3;
+ TCGv_ptr ptr1;
TCGv_i64 tmp64;
/* FIXME: this access check should not take precedence over UNDEF
@@ -5353,38 +5353,46 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
*/
if (!q) {
return 1;
- }
- if (!u) { /* SHA-1 */
- if (!dc_isar_feature(aa32_sha1, s)) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rn);
- ptr3 = vfp_reg_ptr(true, rm);
- tmp4 = tcg_const_i32(size);
- gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
- tcg_temp_free_i32(tmp4);
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- tcg_temp_free_ptr(ptr3);
- } else { /* SHA-256 */
+ } else {
gen_helper_gvec_3 *fn;
- if (!dc_isar_feature(aa32_sha2, s)) {
- return 1;
- }
- switch (size) {
- case 0:
- fn = gen_helper_crypto_sha256h;
- break;
- case 1:
- fn = gen_helper_crypto_sha256h2;
- break;
- case 2:
- fn = gen_helper_crypto_sha256su1;
- break;
- default:
- return 1;
+ if (!u) { /* SHA-1 */
+ if (!dc_isar_feature(aa32_sha1, s)) {
+ return 1;
+ }
+ switch (size) {
+ case 0:
+ fn = gen_helper_crypto_sha1c;
+ break;
+ case 1:
+ fn = gen_helper_crypto_sha1p;
+ break;
+ case 2:
+ fn = gen_helper_crypto_sha1m;
+ break;
+ case 3:
+ fn = gen_helper_crypto_sha1su0;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else { /* SHA-256 */
+ if (!dc_isar_feature(aa32_sha2, s)) {
+ return 1;
+ }
+ switch (size) {
+ case 0:
+ fn = gen_helper_crypto_sha256h;
+ break;
+ case 1:
+ fn = gen_helper_crypto_sha256h2;
+ break;
+ case 2:
+ fn = gen_helper_crypto_sha256su1;
+ break;
+ default:
+ return 1;
+ }
}
tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16, 0, fn);
}
--
2.20.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 6/9] target/arm: Split helper_crypto_sm3tt
2020-04-30 2:02 [PATCH v3 0/9] target/arm: Implement SVE2 Crypto Extensions Richard Henderson
` (4 preceding siblings ...)
2020-04-30 2:02 ` [PATCH v3 5/9] target/arm: Split helper_crypto_sha1_3reg Richard Henderson
@ 2020-04-30 2:03 ` Richard Henderson
2020-04-30 2:03 ` [PATCH v3 7/9] target/arm: Implement SVE2 crypto unary operations Richard Henderson
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2020-04-30 2:03 UTC (permalink / raw)
To: qemu-devel; +Cc: steplong
Rather than passing an opcode to a helper, fully decode the
operation at translate time. Use clear_tail_16 to zap the
balance of the SVE register with the AdvSIMD write.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 5 ++++-
target/arm/crypto_helper.c | 24 ++++++++++++++++++------
target/arm/translate-a64.c | 21 +++++----------------
3 files changed, 27 insertions(+), 23 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 24aca28a05..e891f91e65 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -534,7 +534,10 @@ DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3tt1a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3tt1b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3tt2a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3tt2b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG,
diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c
index 636683d0f1..c76806dc8d 100644
--- a/target/arm/crypto_helper.c
+++ b/target/arm/crypto_helper.c
@@ -632,15 +632,14 @@ void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
clear_tail_16(vd, desc);
}
-void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
- uint32_t opcode)
+static inline void QEMU_ALWAYS_INLINE
+crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
+ uint32_t desc, uint32_t opcode)
{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ uint32_t imm2 = simd_data(desc);
uint32_t t;
assert(imm2 < 4);
@@ -655,7 +654,7 @@ void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
/* SM3TT2B */
t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
} else {
- g_assert_not_reached();
+ qemu_build_not_reached();
}
t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
@@ -680,8 +679,21 @@ void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
rd[0] = d.l[0];
rd[1] = d.l[1];
+
+ clear_tail_16(rd, desc);
}
+#define DO_SM3TT(NAME, OPCODE) \
+ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+ { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
+
+DO_SM3TT(crypto_sm3tt1a, 0)
+DO_SM3TT(crypto_sm3tt1b, 1)
+DO_SM3TT(crypto_sm3tt2a, 2)
+DO_SM3TT(crypto_sm3tt2b, 3)
+
+#undef DO_SM3TT
+
static uint8_t const sm4_sbox[] = {
0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 81ad287811..0fecc9b06f 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -14902,13 +14902,15 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn)
*/
static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
+ gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
+ };
int opcode = extract32(insn, 10, 2);
int imm2 = extract32(insn, 12, 2);
int rm = extract32(insn, 16, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
- TCGv_i32 tcg_imm2, tcg_opcode;
if (!dc_isar_feature(aa64_sm3, s)) {
unallocated_encoding(s);
@@ -14919,20 +14921,7 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
return;
}
- tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- tcg_rn_ptr = vec_full_reg_ptr(s, rn);
- tcg_rm_ptr = vec_full_reg_ptr(s, rm);
- tcg_imm2 = tcg_const_i32(imm2);
- tcg_opcode = tcg_const_i32(opcode);
-
- gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
- tcg_opcode);
-
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
- tcg_temp_free_ptr(tcg_rm_ptr);
- tcg_temp_free_i32(tcg_imm2);
- tcg_temp_free_i32(tcg_opcode);
+ gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
}
/* C3.6 Data processing - SIMD, inc Crypto
--
2.20.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 7/9] target/arm: Implement SVE2 crypto unary operations
2020-04-30 2:02 [PATCH v3 0/9] target/arm: Implement SVE2 Crypto Extensions Richard Henderson
` (5 preceding siblings ...)
2020-04-30 2:03 ` [PATCH v3 6/9] target/arm: Split helper_crypto_sm3tt Richard Henderson
@ 2020-04-30 2:03 ` Richard Henderson
2020-04-30 2:03 ` [PATCH v3 8/9] target/arm: Implement SVE2 crypto destructive binary operations Richard Henderson
2020-04-30 2:03 ` [PATCH v3 9/9] target/arm: Implement SVE2 crypto constructive " Richard Henderson
8 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2020-04-30 2:03 UTC (permalink / raw)
To: qemu-devel; +Cc: steplong
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/sve.decode | 6 ++++++
target/arm/translate-sve.c | 14 ++++++++++++++
2 files changed, 20 insertions(+)
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index b73b64c3f2..04985d2bb8 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1551,3 +1551,9 @@ STNT1_zprz 1110010 .. 00 ..... 001 ... ..... ..... \
# SVE2 32-bit scatter non-temporal store (vector plus scalar)
STNT1_zprz 1110010 .. 10 ..... 001 ... ..... ..... \
@rprr_scatter_store xs=0 esz=2 scale=0
+
+### SVE2 Crypto Extensions
+
+# SVE2 crypto unary operations
+# AESMC and AESIMC
+AESMC 01000101 00 10000011100 decrypt:1 00000 rd:5
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 413000df3b..44dd1fe2b0 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -8174,3 +8174,17 @@ static bool trans_USDOT_zzzz(DisasContext *s, arg_USDOT_zzzz *a)
}
return true;
}
+
+static bool trans_AESMC(DisasContext *s, arg_AESMC *a)
+{
+ if (!dc_isar_feature(aa64_sve2_aes, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ unsigned rd = vec_full_reg_offset(s, a->rd);
+ tcg_gen_gvec_2_ool(rd, rd, vsz, vsz, a->decrypt,
+ gen_helper_crypto_aesmc);
+ }
+ return true;
+}
--
2.20.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 8/9] target/arm: Implement SVE2 crypto destructive binary operations
2020-04-30 2:02 [PATCH v3 0/9] target/arm: Implement SVE2 Crypto Extensions Richard Henderson
` (6 preceding siblings ...)
2020-04-30 2:03 ` [PATCH v3 7/9] target/arm: Implement SVE2 crypto unary operations Richard Henderson
@ 2020-04-30 2:03 ` Richard Henderson
2020-04-30 2:03 ` [PATCH v3 9/9] target/arm: Implement SVE2 crypto constructive " Richard Henderson
8 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2020-04-30 2:03 UTC (permalink / raw)
To: qemu-devel; +Cc: steplong
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 5 +++++
target/arm/sve.decode | 7 ++++++
target/arm/translate-sve.c | 45 ++++++++++++++++++++++++++++++++++++++
3 files changed, 57 insertions(+)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 332c67bbb0..4f09dd42ba 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3870,6 +3870,11 @@ static inline bool isar_feature_aa64_sve2_bitperm(const ARMISARegisters *id)
return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0;
}
+static inline bool isar_feature_aa64_sve2_sm4(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SM4) != 0;
+}
+
static inline bool isar_feature_aa64_sve2_i8mm(const ARMISARegisters *id)
{
return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, I8MM) != 0;
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 04985d2bb8..149de7949b 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -118,6 +118,8 @@
@pd_pn_pm ........ esz:2 .. rm:4 ....... rn:4 . rd:4 &rrr_esz
@rdn_rm ........ esz:2 ...... ...... rm:5 rd:5 \
&rrr_esz rn=%reg_movprfx
+@rdn_rm_e0 ........ .. ...... ...... rm:5 rd:5 \
+ &rrr_esz rn=%reg_movprfx esz=0
@rdn_sh_i8u ........ esz:2 ...... ...... ..... rd:5 \
&rri_esz rn=%reg_movprfx imm=%sh8_i8u
@rdn_i8u ........ esz:2 ...... ... imm:8 rd:5 \
@@ -1557,3 +1559,8 @@ STNT1_zprz 1110010 .. 10 ..... 001 ... ..... ..... \
# SVE2 crypto unary operations
# AESMC and AESIMC
AESMC 01000101 00 10000011100 decrypt:1 00000 rd:5
+
+# SVE2 crypto destructive binary operations
+AESE 01000101 00 10001 0 11100 0 ..... ..... @rdn_rm_e0
+AESD 01000101 00 10001 0 11100 1 ..... ..... @rdn_rm_e0
+SM4E 01000101 00 10001 1 11100 0 ..... ..... @rdn_rm_e0
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 44dd1fe2b0..91e71882d6 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -8188,3 +8188,48 @@ static bool trans_AESMC(DisasContext *s, arg_AESMC *a)
}
return true;
}
+
+static bool do_aese(DisasContext *s, arg_rrr_esz *a, bool decrypt)
+{
+ if (!dc_isar_feature(aa64_sve2_aes, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vsz, vsz, decrypt, gen_helper_crypto_aese);
+ }
+ return true;
+}
+
+static bool trans_AESE(DisasContext *s, arg_rrr_esz *a)
+{
+ return do_aese(s, a, false);
+}
+
+static bool trans_AESD(DisasContext *s, arg_rrr_esz *a)
+{
+ return do_aese(s, a, true);
+}
+
+static bool do_sm4(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
+{
+ if (!dc_isar_feature(aa64_sve2_sm4, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vsz, vsz, 0, fn);
+ }
+ return true;
+}
+
+static bool trans_SM4E(DisasContext *s, arg_rrr_esz *a)
+{
+ return do_sm4(s, a, gen_helper_crypto_sm4e);
+}
--
2.20.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 9/9] target/arm: Implement SVE2 crypto constructive binary operations
2020-04-30 2:02 [PATCH v3 0/9] target/arm: Implement SVE2 Crypto Extensions Richard Henderson
` (7 preceding siblings ...)
2020-04-30 2:03 ` [PATCH v3 8/9] target/arm: Implement SVE2 crypto destructive binary operations Richard Henderson
@ 2020-04-30 2:03 ` Richard Henderson
8 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2020-04-30 2:03 UTC (permalink / raw)
To: qemu-devel; +Cc: steplong
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 5 +++++
target/arm/sve.decode | 4 ++++
target/arm/translate-sve.c | 20 ++++++++++++++++++++
3 files changed, 29 insertions(+)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 4f09dd42ba..0a7c68843b 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3870,6 +3870,11 @@ static inline bool isar_feature_aa64_sve2_bitperm(const ARMISARegisters *id)
return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0;
}
+static inline bool isar_feature_aa64_sve2_sha3(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SHA3) != 0;
+}
+
static inline bool isar_feature_aa64_sve2_sm4(const ARMISARegisters *id)
{
return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SM4) != 0;
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 149de7949b..3cf824bac5 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1564,3 +1564,7 @@ AESMC 01000101 00 10000011100 decrypt:1 00000 rd:5
AESE 01000101 00 10001 0 11100 0 ..... ..... @rdn_rm_e0
AESD 01000101 00 10001 0 11100 1 ..... ..... @rdn_rm_e0
SM4E 01000101 00 10001 1 11100 0 ..... ..... @rdn_rm_e0
+
+# SVE2 crypto constructive binary operations
+SM4EKEY 01000101 00 1 ..... 11110 0 ..... ..... @rd_rn_rm_e0
+RAX1 01000101 00 1 ..... 11110 1 ..... ..... @rd_rn_rm_e0
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 91e71882d6..a8e57ea5f4 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -8233,3 +8233,23 @@ static bool trans_SM4E(DisasContext *s, arg_rrr_esz *a)
{
return do_sm4(s, a, gen_helper_crypto_sm4e);
}
+
+static bool trans_SM4EKEY(DisasContext *s, arg_rrr_esz *a)
+{
+ return do_sm4(s, a, gen_helper_crypto_sm4ekey);
+}
+
+static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
+{
+ if (!dc_isar_feature(aa64_sve2_sha3, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vsz, vsz, &rax1_op);
+ }
+ return true;
+}
--
2.20.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
end of thread, other threads:[~2020-04-30 2:12 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-30 2:02 [PATCH v3 0/9] target/arm: Implement SVE2 Crypto Extensions Richard Henderson
2020-04-30 2:02 ` [PATCH v3 1/9] target/arm: Convert aes and sm4 to gvec helpers Richard Henderson
2020-04-30 2:02 ` [PATCH v3 2/9] target/arm: Convert rax1 " Richard Henderson
2020-04-30 2:02 ` [PATCH v3 3/9] target/arm: Convert sha512 and sm3 " Richard Henderson
2020-04-30 2:02 ` [PATCH v3 4/9] target/arm: Convert sha1 and sha256 " Richard Henderson
2020-04-30 2:02 ` [PATCH v3 5/9] target/arm: Split helper_crypto_sha1_3reg Richard Henderson
2020-04-30 2:03 ` [PATCH v3 6/9] target/arm: Split helper_crypto_sm3tt Richard Henderson
2020-04-30 2:03 ` [PATCH v3 7/9] target/arm: Implement SVE2 crypto unary operations Richard Henderson
2020-04-30 2:03 ` [PATCH v3 8/9] target/arm: Implement SVE2 crypto destructive binary operations Richard Henderson
2020-04-30 2:03 ` [PATCH v3 9/9] target/arm: Implement SVE2 crypto constructive " Richard Henderson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.