linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] crypto: aegis128 SIMD improvements
@ 2019-10-11 17:08 Ard Biesheuvel
  2019-10-11 17:08 ` [PATCH 1/2] crypto: aegis128 - avoid function pointers for parameterization Ard Biesheuvel
  2019-10-11 17:08 ` [PATCH 2/2] crypto: aegis128 - duplicate init() and final() hooks in SIMD code Ard Biesheuvel
  0 siblings, 2 replies; 3+ messages in thread
From: Ard Biesheuvel @ 2019-10-11 17:08 UTC (permalink / raw)
  To: linux-crypto; +Cc: linux-arm-kernel, herbert, Ard Biesheuvel, Ondrej Mosnacek

Refactor the aegis128 code to get rid of indirect calls, and implement
SIMD versions of the init() and final() hooks. This results in a ~2x
speedup on ARM Cortex-A57 for ~1500 byte inputs.

Cc: Ondrej Mosnacek <omosnace@redhat.com>

Ard Biesheuvel (2):
  crypto: aegis128 - avoid function pointers for parameterization
  crypto: aegis128 - duplicate init() and final() hooks in SIMD code

 crypto/aegis128-core.c       | 125 ++++++++++----------
 crypto/aegis128-neon-inner.c |  49 ++++++++
 crypto/aegis128-neon.c       |  22 ++++
 3 files changed, 134 insertions(+), 62 deletions(-)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] crypto: aegis128 - avoid function pointers for parameterization
  2019-10-11 17:08 [PATCH 0/2] crypto: aegis128 SIMD improvements Ard Biesheuvel
@ 2019-10-11 17:08 ` Ard Biesheuvel
  2019-10-11 17:08 ` [PATCH 2/2] crypto: aegis128 - duplicate init() and final() hooks in SIMD code Ard Biesheuvel
  1 sibling, 0 replies; 3+ messages in thread
From: Ard Biesheuvel @ 2019-10-11 17:08 UTC (permalink / raw)
  To: linux-crypto; +Cc: linux-arm-kernel, herbert, Ard Biesheuvel

Instead of passing around an ops structure with function pointers,
which forces indirect calls to be used, refactor the code slightly
so we can use ordinary function calls. At the same time, switch to
a static key to decide whether or not the SIMD code path may be used.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 crypto/aegis128-core.c | 105 +++++++++-----------
 1 file changed, 46 insertions(+), 59 deletions(-)

diff --git a/crypto/aegis128-core.c b/crypto/aegis128-core.c
index 80e73611bd5c..fe7ab66dd8f9 100644
--- a/crypto/aegis128-core.c
+++ b/crypto/aegis128-core.c
@@ -13,6 +13,7 @@
 #include <crypto/scatterwalk.h>
 #include <linux/err.h>
 #include <linux/init.h>
+#include <linux/jump_label.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/scatterlist.h>
@@ -35,15 +36,7 @@ struct aegis_ctx {
 	union aegis_block key;
 };
 
-struct aegis128_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_chunk)(struct aegis_state *state, u8 *dst,
-			    const u8 *src, unsigned int size);
-};
-
-static bool have_simd;
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_simd);
 
 static const union aegis_block crypto_aegis_const[2] = {
 	{ .words64 = {
@@ -59,7 +52,7 @@ static const union aegis_block crypto_aegis_const[2] = {
 static bool aegis128_do_simd(void)
 {
 #ifdef CONFIG_CRYPTO_AEGIS128_SIMD
-	if (have_simd)
+	if (static_branch_likely(&have_simd))
 		return crypto_simd_usable();
 #endif
 	return false;
@@ -323,25 +316,27 @@ static void crypto_aegis128_process_ad(struct aegis_state *state,
 	}
 }
 
-static void crypto_aegis128_process_crypt(struct aegis_state *state,
-					  struct aead_request *req,
-					  const struct aegis128_ops *ops)
+static __always_inline
+int crypto_aegis128_process_crypt(struct aegis_state *state,
+				  struct aead_request *req,
+				  struct skcipher_walk *walk,
+				  void (*crypt)(struct aegis_state *state,
+					        u8 *dst, const u8 *src,
+					        unsigned int size))
 {
-	struct skcipher_walk walk;
+	int err = 0;
 
-	ops->skcipher_walk_init(&walk, req, false);
+	while (walk->nbytes) {
+		unsigned int nbytes = walk->nbytes;
 
-	while (walk.nbytes) {
-		unsigned int nbytes = walk.nbytes;
+		if (nbytes < walk->total)
+			nbytes = round_down(nbytes, walk->stride);
 
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
+		crypt(state, walk->dst.virt.addr, walk->src.virt.addr, nbytes);
 
-		ops->crypt_chunk(state, walk.dst.virt.addr, walk.src.virt.addr,
-				 nbytes);
-
-		skcipher_walk_done(&walk, walk.nbytes - nbytes);
+		err = skcipher_walk_done(walk, walk->nbytes - nbytes);
 	}
+	return err;
 }
 
 static void crypto_aegis128_final(struct aegis_state *state,
@@ -390,39 +385,27 @@ static int crypto_aegis128_setauthsize(struct crypto_aead *tfm,
 	return 0;
 }
 
-static void crypto_aegis128_crypt(struct aead_request *req,
-				  union aegis_block *tag_xor,
-				  unsigned int cryptlen,
-				  const struct aegis128_ops *ops)
+static int crypto_aegis128_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	union aegis_block tag = {};
+	unsigned int authsize = crypto_aead_authsize(tfm);
 	struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
+	unsigned int cryptlen = req->cryptlen;
+	struct skcipher_walk walk;
 	struct aegis_state state;
 
 	crypto_aegis128_init(&state, &ctx->key, req->iv);
 	crypto_aegis128_process_ad(&state, req->src, req->assoclen);
-	crypto_aegis128_process_crypt(&state, req, ops);
-	crypto_aegis128_final(&state, tag_xor, req->assoclen, cryptlen);
-}
-
-static int crypto_aegis128_encrypt(struct aead_request *req)
-{
-	const struct aegis128_ops *ops = &(struct aegis128_ops){
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_chunk = crypto_aegis128_encrypt_chunk,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	union aegis_block tag = {};
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
 
+	skcipher_walk_aead_encrypt(&walk, req, false);
 	if (aegis128_do_simd())
-		ops = &(struct aegis128_ops){
-			.skcipher_walk_init = skcipher_walk_aead_encrypt,
-			.crypt_chunk = crypto_aegis128_encrypt_chunk_simd };
-
-	crypto_aegis128_crypt(req, &tag, cryptlen, ops);
+		crypto_aegis128_process_crypt(&state, req, &walk,
+					      crypto_aegis128_encrypt_chunk_simd);
+	else
+		crypto_aegis128_process_crypt(&state, req, &walk,
+					      crypto_aegis128_encrypt_chunk);
+	crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
 
 	scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
 				 authsize, 1);
@@ -431,26 +414,29 @@ static int crypto_aegis128_encrypt(struct aead_request *req)
 
 static int crypto_aegis128_decrypt(struct aead_request *req)
 {
-	const struct aegis128_ops *ops = &(struct aegis128_ops){
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_chunk = crypto_aegis128_decrypt_chunk,
-	};
 	static const u8 zeros[AEGIS128_MAX_AUTH_SIZE] = {};
-
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	union aegis_block tag;
 	unsigned int authsize = crypto_aead_authsize(tfm);
 	unsigned int cryptlen = req->cryptlen - authsize;
+	struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
+	struct skcipher_walk walk;
+	struct aegis_state state;
 
 	scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
 				 authsize, 0);
 
-	if (aegis128_do_simd())
-		ops = &(struct aegis128_ops){
-			.skcipher_walk_init = skcipher_walk_aead_decrypt,
-			.crypt_chunk = crypto_aegis128_decrypt_chunk_simd };
+	crypto_aegis128_init(&state, &ctx->key, req->iv);
+	crypto_aegis128_process_ad(&state, req->src, req->assoclen);
 
-	crypto_aegis128_crypt(req, &tag, cryptlen, ops);
+	skcipher_walk_aead_decrypt(&walk, req, false);
+	if (aegis128_do_simd())
+		crypto_aegis128_process_crypt(&state, req, &walk,
+					      crypto_aegis128_decrypt_chunk_simd);
+	else
+		crypto_aegis128_process_crypt(&state, req, &walk,
+					      crypto_aegis128_decrypt_chunk);
+	crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
 
 	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
 }
@@ -481,8 +467,9 @@ static struct aead_alg crypto_aegis128_alg = {
 
 static int __init crypto_aegis128_module_init(void)
 {
-	if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD))
-		have_simd = crypto_aegis128_have_simd();
+	if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD) &&
+	    crypto_aegis128_have_simd())
+		static_branch_enable(&have_simd);
 
 	return crypto_register_aead(&crypto_aegis128_alg);
 }
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] crypto: aegis128 - duplicate init() and final() hooks in SIMD code
  2019-10-11 17:08 [PATCH 0/2] crypto: aegis128 SIMD improvements Ard Biesheuvel
  2019-10-11 17:08 ` [PATCH 1/2] crypto: aegis128 - avoid function pointers for parameterization Ard Biesheuvel
@ 2019-10-11 17:08 ` Ard Biesheuvel
  1 sibling, 0 replies; 3+ messages in thread
From: Ard Biesheuvel @ 2019-10-11 17:08 UTC (permalink / raw)
  To: linux-crypto; +Cc: linux-arm-kernel, herbert, Ard Biesheuvel

In order to speed up aegis128 processing even more, duplicate the init()
and final() routines as SIMD versions in their entirety. This results
in a 2x speedup on ARM Cortex-A57 for ~1500 byte inputs (using AES
instructions).

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 crypto/aegis128-core.c       | 38 ++++++++++-----
 crypto/aegis128-neon-inner.c | 49 ++++++++++++++++++++
 crypto/aegis128-neon.c       | 22 +++++++++
 3 files changed, 97 insertions(+), 12 deletions(-)

diff --git a/crypto/aegis128-core.c b/crypto/aegis128-core.c
index fe7ab66dd8f9..71c11cb5bad1 100644
--- a/crypto/aegis128-core.c
+++ b/crypto/aegis128-core.c
@@ -60,10 +60,16 @@ static bool aegis128_do_simd(void)
 
 bool crypto_aegis128_have_simd(void);
 void crypto_aegis128_update_simd(struct aegis_state *state, const void *msg);
+void crypto_aegis128_init_simd(struct aegis_state *state,
+			       const union aegis_block *key,
+			       const u8 *iv);
 void crypto_aegis128_encrypt_chunk_simd(struct aegis_state *state, u8 *dst,
 					const u8 *src, unsigned int size);
 void crypto_aegis128_decrypt_chunk_simd(struct aegis_state *state, u8 *dst,
 					const u8 *src, unsigned int size);
+void crypto_aegis128_final_simd(struct aegis_state *state,
+				union aegis_block *tag_xor,
+				u64 assoclen, u64 cryptlen);
 
 static void crypto_aegis128_update(struct aegis_state *state)
 {
@@ -395,17 +401,21 @@ static int crypto_aegis128_encrypt(struct aead_request *req)
 	struct skcipher_walk walk;
 	struct aegis_state state;
 
-	crypto_aegis128_init(&state, &ctx->key, req->iv);
-	crypto_aegis128_process_ad(&state, req->src, req->assoclen);
-
 	skcipher_walk_aead_encrypt(&walk, req, false);
-	if (aegis128_do_simd())
+	if (aegis128_do_simd()) {
+		crypto_aegis128_init_simd(&state, &ctx->key, req->iv);
+		crypto_aegis128_process_ad(&state, req->src, req->assoclen);
 		crypto_aegis128_process_crypt(&state, req, &walk,
 					      crypto_aegis128_encrypt_chunk_simd);
-	else
+		crypto_aegis128_final_simd(&state, &tag, req->assoclen,
+					   cryptlen);
+	} else {
+		crypto_aegis128_init(&state, &ctx->key, req->iv);
+		crypto_aegis128_process_ad(&state, req->src, req->assoclen);
 		crypto_aegis128_process_crypt(&state, req, &walk,
 					      crypto_aegis128_encrypt_chunk);
-	crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+		crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+	}
 
 	scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
 				 authsize, 1);
@@ -426,17 +436,21 @@ static int crypto_aegis128_decrypt(struct aead_request *req)
 	scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
 				 authsize, 0);
 
-	crypto_aegis128_init(&state, &ctx->key, req->iv);
-	crypto_aegis128_process_ad(&state, req->src, req->assoclen);
-
 	skcipher_walk_aead_decrypt(&walk, req, false);
-	if (aegis128_do_simd())
+	if (aegis128_do_simd()) {
+		crypto_aegis128_init_simd(&state, &ctx->key, req->iv);
+		crypto_aegis128_process_ad(&state, req->src, req->assoclen);
 		crypto_aegis128_process_crypt(&state, req, &walk,
 					      crypto_aegis128_decrypt_chunk_simd);
-	else
+		crypto_aegis128_final_simd(&state, &tag, req->assoclen,
+					   cryptlen);
+	} else {
+		crypto_aegis128_init(&state, &ctx->key, req->iv);
+		crypto_aegis128_process_ad(&state, req->src, req->assoclen);
 		crypto_aegis128_process_crypt(&state, req, &walk,
 					      crypto_aegis128_decrypt_chunk);
-	crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+		crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+	}
 
 	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
 }
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c
index f05310ca22aa..16d2e806eb47 100644
--- a/crypto/aegis128-neon-inner.c
+++ b/crypto/aegis128-neon-inner.c
@@ -132,6 +132,34 @@ void preload_sbox(void)
 	    :: "r"(crypto_aes_sbox));
 }
 
+void crypto_aegis128_init_neon(void *state, const void *key, const void *iv)
+{
+	static const uint8_t const0[] = {
+		0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
+		0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62,
+	};
+	static const uint8_t const1[] = {
+		0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
+		0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd,
+	};
+	uint8x16_t k = vld1q_u8(key);
+	uint8x16_t kiv = k ^ vld1q_u8(iv);
+	struct aegis128_state st = {{
+		kiv,
+		vld1q_u8(const1),
+		vld1q_u8(const0),
+		k ^ vld1q_u8(const0),
+		k ^ vld1q_u8(const1),
+	}};
+	int i;
+
+	for (i = 0; i < 5; i++) {
+		st = aegis128_update_neon(st, k);
+		st = aegis128_update_neon(st, kiv);
+	}
+	aegis128_save_state_neon(st, state);
+}
+
 void crypto_aegis128_update_neon(void *state, const void *msg)
 {
 	struct aegis128_state st = aegis128_load_state_neon(state);
@@ -210,3 +238,24 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
 
 	aegis128_save_state_neon(st, state);
 }
+
+void crypto_aegis128_final_neon(void *state, void *tag_xor,
+				uint64_t assocbits, uint64_t cryptbits)
+{
+	struct aegis128_state st = aegis128_load_state_neon(state);
+	union {
+		uint64_t	pair[2];
+		uint8x16_t	v;
+	} t = {{ assocbits, cryptbits }};
+	uint8x16_t tag;
+	int i;
+
+	t.v ^= st.v[3];
+
+	for (i = 0; i < 7; i++)
+		st = aegis128_update_neon(st, t.v);
+
+	tag = vld1q_u8(tag_xor);
+	tag ^= st.v[0] ^ st.v[1] ^ st.v[2] ^ st.v[3] ^ st.v[4];
+	vst1q_u8(tag_xor, tag);
+}
diff --git a/crypto/aegis128-neon.c b/crypto/aegis128-neon.c
index 751f9c195aa4..5b37b04b8ad4 100644
--- a/crypto/aegis128-neon.c
+++ b/crypto/aegis128-neon.c
@@ -8,11 +8,14 @@
 
 #include "aegis.h"
 
+void crypto_aegis128_init_neon(void *state, const void *key, const u8 *iv);
 void crypto_aegis128_update_neon(void *state, const void *msg);
 void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
 					unsigned int size);
 void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
 					unsigned int size);
+void crypto_aegis128_final_neon(void *state, void *tag_xor,
+				uint64_t assocbits, uint64_t cryptbits);
 
 int aegis128_have_aes_insn __ro_after_init;
 
@@ -25,6 +28,15 @@ bool crypto_aegis128_have_simd(void)
 	return IS_ENABLED(CONFIG_ARM64);
 }
 
+void crypto_aegis128_init_simd(union aegis_block *state,
+			       const union aegis_block *key,
+			       const u8 *iv)
+{
+	kernel_neon_begin();
+	crypto_aegis128_init_neon(state, key, iv);
+	kernel_neon_end();
+}
+
 void crypto_aegis128_update_simd(union aegis_block *state, const void *msg)
 {
 	kernel_neon_begin();
@@ -47,3 +59,13 @@ void crypto_aegis128_decrypt_chunk_simd(union aegis_block *state, u8 *dst,
 	crypto_aegis128_decrypt_chunk_neon(state, dst, src, size);
 	kernel_neon_end();
 }
+
+void crypto_aegis128_final_simd(union aegis_block *state,
+				union aegis_block *tag_xor,
+				u64 assoclen, u64 cryptlen)
+{
+	kernel_neon_begin();
+	crypto_aegis128_final_neon(state, tag_xor, cpu_to_le64(8 * assoclen),
+				   cpu_to_le64(8 * cryptlen));
+	kernel_neon_end();
+}
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-10-11 17:09 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-11 17:08 [PATCH 0/2] crypto: aegis128 SIMD improvements Ard Biesheuvel
2019-10-11 17:08 ` [PATCH 1/2] crypto: aegis128 - avoid function pointers for parameterization Ard Biesheuvel
2019-10-11 17:08 ` [PATCH 2/2] crypto: aegis128 - duplicate init() and final() hooks in SIMD code Ard Biesheuvel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).