linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
To: linux-crypto@vger.kernel.org
Cc: herbert@gondor.apana.org.au, ebiggers@kernel.org,
	Ard Biesheuvel <ard.biesheuvel@linaro.org>,
	Nick Desaulniers <ndesaulniers@google.com>
Subject: [PATCH RFC 3/3] crypto: arm64/aegis128 - implement plain NEON version
Date: Fri,  2 Aug 2019 18:15:10 +0300	[thread overview]
Message-ID: <20190802151510.17074-4-ard.biesheuvel@linaro.org> (raw)
In-Reply-To: <20190802151510.17074-1-ard.biesheuvel@linaro.org>

Provide a version of the core AES transform to the aegis128 SIMD
code that does not rely on the special AES instructions, but uses
plain NEON instructions instead. This allows the SIMD version of
the aegis128 driver to be used on arm64 systems that do not
implement those instructions (which are not mandatory in the
architecture), such as the Raspberry Pi 3.

Cc: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 crypto/Makefile              |  5 ++
 crypto/aegis128-neon-inner.c | 53 ++++++++++++++++++++
 crypto/aegis128-neon.c       | 16 +++++-
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/crypto/Makefile b/crypto/Makefile
index 99a9fa9087d1..c3760c7616ac 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -99,6 +99,11 @@ aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-neon.o aegis128-neon-inner.o
 endif
 ifeq ($(ARCH),arm64)
 CFLAGS_aegis128-neon-inner.o += -ffreestanding -mcpu=generic+crypto
+CFLAGS_aegis128-neon-inner.o += -ffixed-q14 -ffixed-q15
+CFLAGS_aegis128-neon-inner.o += -ffixed-q16 -ffixed-q17 -ffixed-q18 -ffixed-q19
+CFLAGS_aegis128-neon-inner.o += -ffixed-q20 -ffixed-q21 -ffixed-q22 -ffixed-q23
+CFLAGS_aegis128-neon-inner.o += -ffixed-q24 -ffixed-q25 -ffixed-q26 -ffixed-q27
+CFLAGS_aegis128-neon-inner.o += -ffixed-q28 -ffixed-q29 -ffixed-q30 -ffixed-q31
 CFLAGS_REMOVE_aegis128-neon-inner.o += -mgeneral-regs-only
 aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-neon.o aegis128-neon-inner.o
 endif
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c
index 6aca2f425b6d..7aa4cef3c2de 100644
--- a/crypto/aegis128-neon-inner.c
+++ b/crypto/aegis128-neon-inner.c
@@ -17,6 +17,8 @@
 
 #include <stddef.h>
 
+extern int aegis128_have_aes_insn;
+
 void *memcpy(void *dest, const void *src, size_t n);
 void *memset(void *s, int c, size_t n);
 
@@ -49,6 +51,32 @@ uint8x16_t aegis_aes_round(uint8x16_t w)
 {
 	uint8x16_t z = {};
 
+#ifdef CONFIG_ARM64
+	if (!__builtin_expect(aegis128_have_aes_insn, 1)) {
+		uint8x16_t v;
+
+		// shift rows
+		asm("tbl %0.16b, {%0.16b}, v14.16b" : "+w"(w));
+
+		// sub bytes
+		asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w));
+		w -= 0x40;
+		asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w));
+		w -= 0x40;
+		asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w));
+		w -= 0x40;
+		asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w));
+
+		// mix columns
+		w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
+		w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
+		asm("tbl %0.16b, {%1.16b}, v15.16b" : "=w"(v) : "w"(v ^ w));
+		w ^= v;
+
+		return w;
+	}
+#endif
+
 	/*
 	 * We use inline asm here instead of the vaeseq_u8/vaesmcq_u8 intrinsics
 	 * to force the compiler to issue the aese/aesmc instructions in pairs.
@@ -149,3 +177,28 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
 
 	aegis128_save_state_neon(st, state);
 }
+
+#ifdef CONFIG_ARM64
+void crypto_aegis128_init_neon(void)
+{
+	u64 tmp;
+
+	asm volatile(
+	    "adrp		%0, crypto_aes_sbox		\n\t"
+	    "add		%0, %0, :lo12:crypto_aes_sbox	\n\t"
+	    "mov		v14.16b, %1.16b			\n\t"
+	    "mov		v15.16b, %2.16b			\n\t"
+	    "ld1		{v16.16b-v19.16b}, [%0], #64	\n\t"
+	    "ld1		{v20.16b-v23.16b}, [%0], #64	\n\t"
+	    "ld1		{v24.16b-v27.16b}, [%0], #64	\n\t"
+	    "ld1		{v28.16b-v31.16b}, [%0]		\n\t"
+	    : "=&r"(tmp)
+	    : "w"((uint8x16_t){ // shift rows permutation vector
+			0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
+			0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb, }),
+	      "w"((uint8x16_t){ // ror32 permutation vector
+			0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
+			0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,	})
+	);
+}
+#endif
diff --git a/crypto/aegis128-neon.c b/crypto/aegis128-neon.c
index c1c0a1686f67..72f9d48e4963 100644
--- a/crypto/aegis128-neon.c
+++ b/crypto/aegis128-neon.c
@@ -14,14 +14,24 @@ void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
 void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
 					unsigned int size);
 
+void crypto_aegis128_init_neon(void);
+
+int aegis128_have_aes_insn __ro_after_init;
+
 bool crypto_aegis128_have_simd(void)
 {
-	return cpu_have_feature(cpu_feature(AES));
+	if (cpu_have_feature(cpu_feature(AES))) {
+		aegis128_have_aes_insn = 1;
+		return true;
+	}
+	return IS_ENABLED(CONFIG_ARM64);
 }
 
 void crypto_aegis128_update_simd(union aegis_block *state, const void *msg)
 {
 	kernel_neon_begin();
+	if (IS_ENABLED(CONFIG_ARM64) && !aegis128_have_aes_insn)
+		crypto_aegis128_init_neon();
 	crypto_aegis128_update_neon(state, msg);
 	kernel_neon_end();
 }
@@ -30,6 +40,8 @@ void crypto_aegis128_encrypt_chunk_simd(union aegis_block *state, u8 *dst,
 					const u8 *src, unsigned int size)
 {
 	kernel_neon_begin();
+	if (IS_ENABLED(CONFIG_ARM64) && !aegis128_have_aes_insn)
+		crypto_aegis128_init_neon();
 	crypto_aegis128_encrypt_chunk_neon(state, dst, src, size);
 	kernel_neon_end();
 }
@@ -38,6 +50,8 @@ void crypto_aegis128_decrypt_chunk_simd(union aegis_block *state, u8 *dst,
 					const u8 *src, unsigned int size)
 {
 	kernel_neon_begin();
+	if (IS_ENABLED(CONFIG_ARM64) && !aegis128_have_aes_insn)
+		crypto_aegis128_init_neon();
 	crypto_aegis128_decrypt_chunk_neon(state, dst, src, size);
 	kernel_neon_end();
 }
-- 
2.17.1


  parent reply	other threads:[~2019-08-02 15:16 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-02 15:15 [PATCH 0/3] crypto: aegis128 followup Ard Biesheuvel
2019-08-02 15:15 ` [PATCH resend 1/3] crypto: aegis128 - add support for SIMD acceleration Ard Biesheuvel
2019-08-02 15:15 ` [PATCH resend 2/3] crypto: aegis128 - provide a SIMD implementation based on NEON intrinsics Ard Biesheuvel
2019-08-02 15:15 ` Ard Biesheuvel [this message]
2019-08-08 22:31   ` [PATCH RFC 3/3] crypto: arm64/aegis128 - implement plain NEON version Nick Desaulniers
2019-08-09 17:20     ` Ard Biesheuvel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190802151510.17074-4-ard.biesheuvel@linaro.org \
    --to=ard.biesheuvel@linaro.org \
    --cc=ebiggers@kernel.org \
    --cc=herbert@gondor.apana.org.au \
    --cc=linux-crypto@vger.kernel.org \
    --cc=ndesaulniers@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).