Linux-Crypto Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/5] crypto: clean up ARM/arm64 glue code for GHASH and GCM
@ 2020-06-29  7:39 Ard Biesheuvel
  2020-06-29  7:39 ` [PATCH 1/5] crypto: arm64/ghash - drop PMULL based shash Ard Biesheuvel
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Ard Biesheuvel @ 2020-06-29  7:39 UTC (permalink / raw)
  To: linux-crypto; +Cc: herbert, Ard Biesheuvel

Get rid of pointless indirect calls where the target of the call is decided
at boot and never changes. Also, make the size of the key struct variable,
and only carry the extra keys needed for aggregation when using a version
of the algorithm that makes use of them.

Ard Biesheuvel (5):
  crypto: arm64/ghash - drop PMULL based shash
  crypto: arm64/gcm - disentangle ghash and gcm setkey() routines
  crypto: arm64/gcm - use variably sized key struct
  crypto: arm64/gcm - use inline helper to suppress indirect calls
  crypto: arm/ghash - use variably sized key struct

 arch/arm/crypto/ghash-ce-glue.c   |  51 ++--
 arch/arm64/crypto/ghash-ce-glue.c | 257 +++++++-------------
 2 files changed, 118 insertions(+), 190 deletions(-)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/5] crypto: arm64/ghash - drop PMULL based shash
  2020-06-29  7:39 [PATCH 0/5] crypto: clean up ARM/arm64 glue code for GHASH and GCM Ard Biesheuvel
@ 2020-06-29  7:39 ` Ard Biesheuvel
  2020-06-29  7:39 ` [PATCH 2/5] crypto: arm64/gcm - disentangle ghash and gcm setkey() routines Ard Biesheuvel
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Ard Biesheuvel @ 2020-06-29  7:39 UTC (permalink / raw)
  To: linux-crypto; +Cc: herbert, Ard Biesheuvel

There are two ways to implement SIMD accelerated GCM on arm64:
- using the PMULL instructions for carryless 64x64->128 multiplication,
  in which case the architecture guarantees that the AES instructions are
  available as well, and so we can use the AEAD implementation that combines
  both,
- using the PMULL instructions for carryless 8x8->16 bit multiplication,
  which is implemented as a shash, and can be combined with any ctr(aes)
  implementation by the generic GCM AEAD template driver.

So let's drop the 64x64->128 shash driver, which is never needed for GCM,
and not suitable for use anywhere else.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/crypto/ghash-ce-glue.c | 90 +++-----------------
 1 file changed, 12 insertions(+), 78 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 22831d3b7f62..be63d8b5152c 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -113,12 +113,8 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
 /* avoid hogging the CPU for too long */
 #define MAX_BLOCKS	(SZ_64K / GHASH_BLOCK_SIZE)
 
-static int __ghash_update(struct shash_desc *desc, const u8 *src,
-			  unsigned int len,
-			  void (*simd_update)(int blocks, u64 dg[],
-					      const char *src,
-					      struct ghash_key const *k,
-					      const char *head))
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+			unsigned int len)
 {
 	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
 	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
@@ -145,7 +141,7 @@ static int __ghash_update(struct shash_desc *desc, const u8 *src,
 
 			ghash_do_update(chunk, ctx->digest, src, key,
 					partial ? ctx->buf : NULL,
-					simd_update);
+					pmull_ghash_update_p8);
 
 			blocks -= chunk;
 			src += chunk * GHASH_BLOCK_SIZE;
@@ -157,19 +153,7 @@ static int __ghash_update(struct shash_desc *desc, const u8 *src,
 	return 0;
 }
 
-static int ghash_update_p8(struct shash_desc *desc, const u8 *src,
-			   unsigned int len)
-{
-	return __ghash_update(desc, src, len, pmull_ghash_update_p8);
-}
-
-static int ghash_update_p64(struct shash_desc *desc, const u8 *src,
-			    unsigned int len)
-{
-	return __ghash_update(desc, src, len, pmull_ghash_update_p64);
-}
-
-static int ghash_final_p8(struct shash_desc *desc, u8 *dst)
+static int ghash_final(struct shash_desc *desc, u8 *dst)
 {
 	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
 	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
@@ -189,26 +173,6 @@ static int ghash_final_p8(struct shash_desc *desc, u8 *dst)
 	return 0;
 }
 
-static int ghash_final_p64(struct shash_desc *desc, u8 *dst)
-{
-	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
-	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
-
-	if (partial) {
-		struct ghash_key *key = crypto_shash_ctx(desc->tfm);
-
-		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
-
-		ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
-				pmull_ghash_update_p64);
-	}
-	put_unaligned_be64(ctx->digest[1], dst);
-	put_unaligned_be64(ctx->digest[0], dst + 8);
-
-	*ctx = (struct ghash_desc_ctx){};
-	return 0;
-}
-
 static void ghash_reflect(u64 h[], const be128 *k)
 {
 	u64 carry = be64_to_cpu(k->a) & BIT(63) ? 1 : 0;
@@ -254,7 +218,7 @@ static int ghash_setkey(struct crypto_shash *tfm,
 	return __ghash_setkey(key, inkey, keylen);
 }
 
-static struct shash_alg ghash_alg[] = {{
+static struct shash_alg ghash_alg = {
 	.base.cra_name		= "ghash",
 	.base.cra_driver_name	= "ghash-neon",
 	.base.cra_priority	= 150,
@@ -264,25 +228,11 @@ static struct shash_alg ghash_alg[] = {{
 
 	.digestsize		= GHASH_DIGEST_SIZE,
 	.init			= ghash_init,
-	.update			= ghash_update_p8,
-	.final			= ghash_final_p8,
-	.setkey			= ghash_setkey,
-	.descsize		= sizeof(struct ghash_desc_ctx),
-}, {
-	.base.cra_name		= "ghash",
-	.base.cra_driver_name	= "ghash-ce",
-	.base.cra_priority	= 200,
-	.base.cra_blocksize	= GHASH_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct ghash_key),
-	.base.cra_module	= THIS_MODULE,
-
-	.digestsize		= GHASH_DIGEST_SIZE,
-	.init			= ghash_init,
-	.update			= ghash_update_p64,
-	.final			= ghash_final_p64,
+	.update			= ghash_update,
+	.final			= ghash_final,
 	.setkey			= ghash_setkey,
 	.descsize		= sizeof(struct ghash_desc_ctx),
-}};
+};
 
 static int num_rounds(struct crypto_aes_ctx *ctx)
 {
@@ -641,37 +591,21 @@ static struct aead_alg gcm_aes_alg = {
 
 static int __init ghash_ce_mod_init(void)
 {
-	int ret;
-
 	if (!cpu_have_named_feature(ASIMD))
 		return -ENODEV;
 
 	if (cpu_have_named_feature(PMULL))
-		ret = crypto_register_shashes(ghash_alg,
-					      ARRAY_SIZE(ghash_alg));
-	else
-		/* only register the first array element */
-		ret = crypto_register_shash(ghash_alg);
+		return crypto_register_aead(&gcm_aes_alg);
 
-	if (ret)
-		return ret;
-
-	if (cpu_have_named_feature(PMULL)) {
-		ret = crypto_register_aead(&gcm_aes_alg);
-		if (ret)
-			crypto_unregister_shashes(ghash_alg,
-						  ARRAY_SIZE(ghash_alg));
-	}
-	return ret;
+	return crypto_register_shash(&ghash_alg);
 }
 
 static void __exit ghash_ce_mod_exit(void)
 {
 	if (cpu_have_named_feature(PMULL))
-		crypto_unregister_shashes(ghash_alg, ARRAY_SIZE(ghash_alg));
+		crypto_unregister_aead(&gcm_aes_alg);
 	else
-		crypto_unregister_shash(ghash_alg);
-	crypto_unregister_aead(&gcm_aes_alg);
+		crypto_unregister_shash(&ghash_alg);
 }
 
 static const struct cpu_feature ghash_cpu_feature[] = {
-- 
2.20.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/5] crypto: arm64/gcm - disentangle ghash and gcm setkey() routines
  2020-06-29  7:39 [PATCH 0/5] crypto: clean up ARM/arm64 glue code for GHASH and GCM Ard Biesheuvel
  2020-06-29  7:39 ` [PATCH 1/5] crypto: arm64/ghash - drop PMULL based shash Ard Biesheuvel
@ 2020-06-29  7:39 ` Ard Biesheuvel
  2020-06-29  7:39 ` [PATCH 3/5] crypto: arm64/gcm - use variably sized key struct Ard Biesheuvel
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Ard Biesheuvel @ 2020-06-29  7:39 UTC (permalink / raw)
  To: linux-crypto; +Cc: herbert, Ard Biesheuvel

The remaining ghash implementation does not support aggregation, and so
there is no point in including the precomputed powers of H in the key
struct. So move that into the GCM setkey routine, and get rid of the
shared sub-routine entirely.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/crypto/ghash-ce-glue.c | 47 +++++++++-----------
 1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index be63d8b5152c..921fa69b5ded 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -184,29 +184,6 @@ static void ghash_reflect(u64 h[], const be128 *k)
 		h[1] ^= 0xc200000000000000UL;
 }
 
-static int __ghash_setkey(struct ghash_key *key,
-			  const u8 *inkey, unsigned int keylen)
-{
-	be128 h;
-
-	/* needed for the fallback */
-	memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
-
-	ghash_reflect(key->h, &key->k);
-
-	h = key->k;
-	gf128mul_lle(&h, &key->k);
-	ghash_reflect(key->h2, &h);
-
-	gf128mul_lle(&h, &key->k);
-	ghash_reflect(key->h3, &h);
-
-	gf128mul_lle(&h, &key->k);
-	ghash_reflect(key->h4, &h);
-
-	return 0;
-}
-
 static int ghash_setkey(struct crypto_shash *tfm,
 			const u8 *inkey, unsigned int keylen)
 {
@@ -215,7 +192,11 @@ static int ghash_setkey(struct crypto_shash *tfm,
 	if (keylen != GHASH_BLOCK_SIZE)
 		return -EINVAL;
 
-	return __ghash_setkey(key, inkey, keylen);
+	/* needed for the fallback */
+	memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
+
+	ghash_reflect(key->h, &key->k);
+	return 0;
 }
 
 static struct shash_alg ghash_alg = {
@@ -251,6 +232,7 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
 {
 	struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
 	u8 key[GHASH_BLOCK_SIZE];
+	be128 h;
 	int ret;
 
 	ret = aes_expandkey(&ctx->aes_key, inkey, keylen);
@@ -259,7 +241,22 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
 
 	aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){});
 
-	return __ghash_setkey(&ctx->ghash_key, key, sizeof(be128));
+	/* needed for the fallback */
+	memcpy(&ctx->ghash_key.k, key, GHASH_BLOCK_SIZE);
+
+	ghash_reflect(ctx->ghash_key.h, &ctx->ghash_key.k);
+
+	h = ctx->ghash_key.k;
+	gf128mul_lle(&h, &ctx->ghash_key.k);
+	ghash_reflect(ctx->ghash_key.h2, &h);
+
+	gf128mul_lle(&h, &ctx->ghash_key.k);
+	ghash_reflect(ctx->ghash_key.h3, &h);
+
+	gf128mul_lle(&h, &ctx->ghash_key.k);
+	ghash_reflect(ctx->ghash_key.h4, &h);
+
+	return 0;
 }
 
 static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
-- 
2.20.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 3/5] crypto: arm64/gcm - use variably sized key struct
  2020-06-29  7:39 [PATCH 0/5] crypto: clean up ARM/arm64 glue code for GHASH and GCM Ard Biesheuvel
  2020-06-29  7:39 ` [PATCH 1/5] crypto: arm64/ghash - drop PMULL based shash Ard Biesheuvel
  2020-06-29  7:39 ` [PATCH 2/5] crypto: arm64/gcm - disentangle ghash and gcm setkey() routines Ard Biesheuvel
@ 2020-06-29  7:39 ` Ard Biesheuvel
  2020-06-29  7:39 ` [PATCH 4/5] crypto: arm64/gcm - use inline helper to suppress indirect calls Ard Biesheuvel
  2020-06-29  7:39 ` [PATCH 5/5] crypto: arm/ghash - use variably sized key struct Ard Biesheuvel
  4 siblings, 0 replies; 6+ messages in thread
From: Ard Biesheuvel @ 2020-06-29  7:39 UTC (permalink / raw)
  To: linux-crypto; +Cc: herbert, Ard Biesheuvel

Now that the ghash and gcm drivers are split, we no longer need to allocate
a key struct for the former that carries powers of H that are only used by
the latter. Also, take this opportunity to clean up the code a little bit.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/crypto/ghash-ce-glue.c | 49 +++++++++-----------
 1 file changed, 21 insertions(+), 28 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 921fa69b5ded..2ae95dcf648f 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -31,12 +31,8 @@ MODULE_ALIAS_CRYPTO("ghash");
 #define GCM_IV_SIZE		12
 
 struct ghash_key {
-	u64			h[2];
-	u64			h2[2];
-	u64			h3[2];
-	u64			h4[2];
-
 	be128			k;
+	u64			h[][2];
 };
 
 struct ghash_desc_ctx {
@@ -51,22 +47,18 @@ struct gcm_aes_ctx {
 };
 
 asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
-				       struct ghash_key const *k,
-				       const char *head);
+				       u64 const h[][2], const char *head);
 
 asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
-				      struct ghash_key const *k,
-				      const char *head);
+				      u64 const h[][2], const char *head);
 
 asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
-				  struct ghash_key const *k, u64 dg[],
-				  u8 ctr[], u32 const rk[], int rounds,
-				  u8 tag[]);
+				  u64 const h[][2], u64 dg[], u8 ctr[],
+				  u32 const rk[], int rounds, u8 tag[]);
 
 asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
-				  struct ghash_key const *k, u64 dg[],
-				  u8 ctr[], u32 const rk[], int rounds,
-				  u8 tag[]);
+				  u64 const h[][2], u64 dg[], u8 ctr[],
+				  u32 const rk[], int rounds, u8 tag[]);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -80,12 +72,12 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
 			    struct ghash_key *key, const char *head,
 			    void (*simd_update)(int blocks, u64 dg[],
 						const char *src,
-						struct ghash_key const *k,
+						u64 const h[][2],
 						const char *head))
 {
 	if (likely(crypto_simd_usable() && simd_update)) {
 		kernel_neon_begin();
-		simd_update(blocks, dg, src, key, head);
+		simd_update(blocks, dg, src, key->h, head);
 		kernel_neon_end();
 	} else {
 		be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
@@ -195,7 +187,7 @@ static int ghash_setkey(struct crypto_shash *tfm,
 	/* needed for the fallback */
 	memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
 
-	ghash_reflect(key->h, &key->k);
+	ghash_reflect(key->h[0], &key->k);
 	return 0;
 }
 
@@ -204,7 +196,7 @@ static struct shash_alg ghash_alg = {
 	.base.cra_driver_name	= "ghash-neon",
 	.base.cra_priority	= 150,
 	.base.cra_blocksize	= GHASH_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct ghash_key),
+	.base.cra_ctxsize	= sizeof(struct ghash_key) + sizeof(u64[2]),
 	.base.cra_module	= THIS_MODULE,
 
 	.digestsize		= GHASH_DIGEST_SIZE,
@@ -244,17 +236,17 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
 	/* needed for the fallback */
 	memcpy(&ctx->ghash_key.k, key, GHASH_BLOCK_SIZE);
 
-	ghash_reflect(ctx->ghash_key.h, &ctx->ghash_key.k);
+	ghash_reflect(ctx->ghash_key.h[0], &ctx->ghash_key.k);
 
 	h = ctx->ghash_key.k;
 	gf128mul_lle(&h, &ctx->ghash_key.k);
-	ghash_reflect(ctx->ghash_key.h2, &h);
+	ghash_reflect(ctx->ghash_key.h[1], &h);
 
 	gf128mul_lle(&h, &ctx->ghash_key.k);
-	ghash_reflect(ctx->ghash_key.h3, &h);
+	ghash_reflect(ctx->ghash_key.h[2], &h);
 
 	gf128mul_lle(&h, &ctx->ghash_key.k);
-	ghash_reflect(ctx->ghash_key.h4, &h);
+	ghash_reflect(ctx->ghash_key.h[3], &h);
 
 	return 0;
 }
@@ -380,8 +372,8 @@ static int gcm_encrypt(struct aead_request *req)
 			}
 
 			kernel_neon_begin();
-			pmull_gcm_encrypt(nbytes, dst, src, &ctx->ghash_key, dg,
-					  iv, ctx->aes_key.key_enc, nrounds,
+			pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h,
+					  dg, iv, ctx->aes_key.key_enc, nrounds,
 					  tag);
 			kernel_neon_end();
 
@@ -494,8 +486,8 @@ static int gcm_decrypt(struct aead_request *req)
 			}
 
 			kernel_neon_begin();
-			pmull_gcm_decrypt(nbytes, dst, src, &ctx->ghash_key, dg,
-					  iv, ctx->aes_key.key_enc, nrounds,
+			pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h,
+					  dg, iv, ctx->aes_key.key_enc, nrounds,
 					  tag);
 			kernel_neon_end();
 
@@ -582,7 +574,8 @@ static struct aead_alg gcm_aes_alg = {
 	.base.cra_driver_name	= "gcm-aes-ce",
 	.base.cra_priority	= 300,
 	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct gcm_aes_ctx),
+	.base.cra_ctxsize	= sizeof(struct gcm_aes_ctx) +
+				  4 * sizeof(u64[2]),
 	.base.cra_module	= THIS_MODULE,
 };
 
-- 
2.20.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 4/5] crypto: arm64/gcm - use inline helper to suppress indirect calls
  2020-06-29  7:39 [PATCH 0/5] crypto: clean up ARM/arm64 glue code for GHASH and GCM Ard Biesheuvel
                   ` (2 preceding siblings ...)
  2020-06-29  7:39 ` [PATCH 3/5] crypto: arm64/gcm - use variably sized key struct Ard Biesheuvel
@ 2020-06-29  7:39 ` Ard Biesheuvel
  2020-06-29  7:39 ` [PATCH 5/5] crypto: arm/ghash - use variably sized key struct Ard Biesheuvel
  4 siblings, 0 replies; 6+ messages in thread
From: Ard Biesheuvel @ 2020-06-29  7:39 UTC (permalink / raw)
  To: linux-crypto; +Cc: herbert, Ard Biesheuvel

Introduce an inline wrapper for ghash_do_update() that incorporates
the indirect call to the asm routine that is passed as an argument,
and keep the non-SIMD fallback code out of line. This ensures that
all references to the function pointer are inlined where the address
is taken, removing the need for any indirect calls to begin with.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/crypto/ghash-ce-glue.c | 85 +++++++++++---------
 1 file changed, 46 insertions(+), 39 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 2ae95dcf648f..da1034867aaa 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -69,36 +69,43 @@ static int ghash_init(struct shash_desc *desc)
 }
 
 static void ghash_do_update(int blocks, u64 dg[], const char *src,
-			    struct ghash_key *key, const char *head,
-			    void (*simd_update)(int blocks, u64 dg[],
-						const char *src,
-						u64 const h[][2],
-						const char *head))
+			    struct ghash_key *key, const char *head)
 {
-	if (likely(crypto_simd_usable() && simd_update)) {
+	be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
+
+	do {
+		const u8 *in = src;
+
+		if (head) {
+			in = head;
+			blocks++;
+			head = NULL;
+		} else {
+			src += GHASH_BLOCK_SIZE;
+		}
+
+		crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
+		gf128mul_lle(&dst, &key->k);
+	} while (--blocks);
+
+	dg[0] = be64_to_cpu(dst.b);
+	dg[1] = be64_to_cpu(dst.a);
+}
+
+static __always_inline
+void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
+			  struct ghash_key *key, const char *head,
+			  void (*simd_update)(int blocks, u64 dg[],
+					      const char *src,
+					      u64 const h[][2],
+					      const char *head))
+{
+	if (likely(crypto_simd_usable())) {
 		kernel_neon_begin();
 		simd_update(blocks, dg, src, key->h, head);
 		kernel_neon_end();
 	} else {
-		be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
-
-		do {
-			const u8 *in = src;
-
-			if (head) {
-				in = head;
-				blocks++;
-				head = NULL;
-			} else {
-				src += GHASH_BLOCK_SIZE;
-			}
-
-			crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
-			gf128mul_lle(&dst, &key->k);
-		} while (--blocks);
-
-		dg[0] = be64_to_cpu(dst.b);
-		dg[1] = be64_to_cpu(dst.a);
+		ghash_do_update(blocks, dg, src, key, head);
 	}
 }
 
@@ -131,9 +138,9 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
 		do {
 			int chunk = min(blocks, MAX_BLOCKS);
 
-			ghash_do_update(chunk, ctx->digest, src, key,
-					partial ? ctx->buf : NULL,
-					pmull_ghash_update_p8);
+			ghash_do_simd_update(chunk, ctx->digest, src, key,
+					     partial ? ctx->buf : NULL,
+					     pmull_ghash_update_p8);
 
 			blocks -= chunk;
 			src += chunk * GHASH_BLOCK_SIZE;
@@ -155,8 +162,8 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 
 		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
 
-		ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
-				pmull_ghash_update_p8);
+		ghash_do_simd_update(1, ctx->digest, ctx->buf, key, NULL,
+				     pmull_ghash_update_p8);
 	}
 	put_unaligned_be64(ctx->digest[1], dst);
 	put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -280,9 +287,9 @@ static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
 	if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
 		int blocks = count / GHASH_BLOCK_SIZE;
 
-		ghash_do_update(blocks, dg, src, &ctx->ghash_key,
-				*buf_count ? buf : NULL,
-				pmull_ghash_update_p64);
+		ghash_do_simd_update(blocks, dg, src, &ctx->ghash_key,
+				     *buf_count ? buf : NULL,
+				     pmull_ghash_update_p64);
 
 		src += blocks * GHASH_BLOCK_SIZE;
 		count %= GHASH_BLOCK_SIZE;
@@ -326,8 +333,8 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
 
 	if (buf_count) {
 		memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
-		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL,
-				pmull_ghash_update_p64);
+		ghash_do_simd_update(1, dg, buf, &ctx->ghash_key, NULL,
+				     pmull_ghash_update_p64);
 	}
 }
 
@@ -403,7 +410,7 @@ static int gcm_encrypt(struct aead_request *req)
 			} while (--remaining > 0);
 
 			ghash_do_update(blocks, dg, walk.dst.virt.addr,
-					&ctx->ghash_key, NULL, NULL);
+					&ctx->ghash_key, NULL);
 
 			err = skcipher_walk_done(&walk,
 						 walk.nbytes % AES_BLOCK_SIZE);
@@ -422,7 +429,7 @@ static int gcm_encrypt(struct aead_request *req)
 
 		tag = (u8 *)&lengths;
 		ghash_do_update(1, dg, tag, &ctx->ghash_key,
-				walk.nbytes ? buf : NULL, NULL);
+				walk.nbytes ? buf : NULL);
 
 		if (walk.nbytes)
 			err = skcipher_walk_done(&walk, 0);
@@ -507,7 +514,7 @@ static int gcm_decrypt(struct aead_request *req)
 			u8 *dst = walk.dst.virt.addr;
 
 			ghash_do_update(blocks, dg, walk.src.virt.addr,
-					&ctx->ghash_key, NULL, NULL);
+					&ctx->ghash_key, NULL);
 
 			do {
 				aes_encrypt(&ctx->aes_key, buf, iv);
@@ -530,7 +537,7 @@ static int gcm_decrypt(struct aead_request *req)
 
 		tag = (u8 *)&lengths;
 		ghash_do_update(1, dg, tag, &ctx->ghash_key,
-				walk.nbytes ? buf : NULL, NULL);
+				walk.nbytes ? buf : NULL);
 
 		if (walk.nbytes) {
 			aes_encrypt(&ctx->aes_key, buf, iv);
-- 
2.20.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 5/5] crypto: arm/ghash - use variably sized key struct
  2020-06-29  7:39 [PATCH 0/5] crypto: clean up ARM/arm64 glue code for GHASH and GCM Ard Biesheuvel
                   ` (3 preceding siblings ...)
  2020-06-29  7:39 ` [PATCH 4/5] crypto: arm64/gcm - use inline helper to suppress indirect calls Ard Biesheuvel
@ 2020-06-29  7:39 ` Ard Biesheuvel
  4 siblings, 0 replies; 6+ messages in thread
From: Ard Biesheuvel @ 2020-06-29  7:39 UTC (permalink / raw)
  To: linux-crypto; +Cc: herbert, Ard Biesheuvel

Of the two versions of GHASH that the ARM driver implements, only one
performs aggregation, and so the other one has no use for the powers
of H to be precomputed, or space to be allocated for them in the key
struct. So make the context size dependent on which version is being
selected, and while at it, use a static key to carry this decision,
and get rid of the function pointer.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/crypto/ghash-ce-glue.c | 51 +++++++++-----------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index a00fd329255f..f13401f3e669 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -16,6 +16,7 @@
 #include <crypto/gf128mul.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
+#include <linux/jump_label.h>
 #include <linux/module.h>
 
 MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions");
@@ -27,12 +28,8 @@ MODULE_ALIAS_CRYPTO("ghash");
 #define GHASH_DIGEST_SIZE	16
 
 struct ghash_key {
-	u64	h[2];
-	u64	h2[2];
-	u64	h3[2];
-	u64	h4[2];
-
 	be128	k;
+	u64	h[][2];
 };
 
 struct ghash_desc_ctx {
@@ -46,16 +43,12 @@ struct ghash_async_ctx {
 };
 
 asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
-				       struct ghash_key const *k,
-				       const char *head);
+				       u64 const h[][2], const char *head);
 
 asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
-				      struct ghash_key const *k,
-				      const char *head);
+				      u64 const h[][2], const char *head);
 
-static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
-				  struct ghash_key const *k,
-				  const char *head);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_p64);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -70,7 +63,10 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
 {
 	if (likely(crypto_simd_usable())) {
 		kernel_neon_begin();
-		pmull_ghash_update(blocks, dg, src, key, head);
+		if (static_branch_likely(&use_p64))
+			pmull_ghash_update_p64(blocks, dg, src, key->h, head);
+		else
+			pmull_ghash_update_p8(blocks, dg, src, key->h, head);
 		kernel_neon_end();
 	} else {
 		be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
@@ -161,25 +157,26 @@ static int ghash_setkey(struct crypto_shash *tfm,
 			const u8 *inkey, unsigned int keylen)
 {
 	struct ghash_key *key = crypto_shash_ctx(tfm);
-	be128 h;
 
 	if (keylen != GHASH_BLOCK_SIZE)
 		return -EINVAL;
 
 	/* needed for the fallback */
 	memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
-	ghash_reflect(key->h, &key->k);
+	ghash_reflect(key->h[0], &key->k);
 
-	h = key->k;
-	gf128mul_lle(&h, &key->k);
-	ghash_reflect(key->h2, &h);
+	if (static_branch_likely(&use_p64)) {
+		be128 h = key->k;
 
-	gf128mul_lle(&h, &key->k);
-	ghash_reflect(key->h3, &h);
+		gf128mul_lle(&h, &key->k);
+		ghash_reflect(key->h[1], &h);
 
-	gf128mul_lle(&h, &key->k);
-	ghash_reflect(key->h4, &h);
+		gf128mul_lle(&h, &key->k);
+		ghash_reflect(key->h[2], &h);
 
+		gf128mul_lle(&h, &key->k);
+		ghash_reflect(key->h[3], &h);
+	}
 	return 0;
 }
 
@@ -195,7 +192,7 @@ static struct shash_alg ghash_alg = {
 	.base.cra_driver_name	= "ghash-ce-sync",
 	.base.cra_priority	= 300 - 1,
 	.base.cra_blocksize	= GHASH_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct ghash_key),
+	.base.cra_ctxsize	= sizeof(struct ghash_key) + sizeof(u64[2]),
 	.base.cra_module	= THIS_MODULE,
 };
 
@@ -354,10 +351,10 @@ static int __init ghash_ce_mod_init(void)
 	if (!(elf_hwcap & HWCAP_NEON))
 		return -ENODEV;
 
-	if (elf_hwcap2 & HWCAP2_PMULL)
-		pmull_ghash_update = pmull_ghash_update_p64;
-	else
-		pmull_ghash_update = pmull_ghash_update_p8;
+	if (elf_hwcap2 & HWCAP2_PMULL) {
+		ghash_alg.base.cra_ctxsize += 3 * sizeof(u64[2]);
+		static_branch_enable(&use_p64);
+	}
 
 	err = crypto_register_shash(&ghash_alg);
 	if (err)
-- 
2.20.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, back to index

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-29  7:39 [PATCH 0/5] crypto: clean up ARM/arm64 glue code for GHASH and GCM Ard Biesheuvel
2020-06-29  7:39 ` [PATCH 1/5] crypto: arm64/ghash - drop PMULL based shash Ard Biesheuvel
2020-06-29  7:39 ` [PATCH 2/5] crypto: arm64/gcm - disentangle ghash and gcm setkey() routines Ard Biesheuvel
2020-06-29  7:39 ` [PATCH 3/5] crypto: arm64/gcm - use variably sized key struct Ard Biesheuvel
2020-06-29  7:39 ` [PATCH 4/5] crypto: arm64/gcm - use inline helper to suppress indirect calls Ard Biesheuvel
2020-06-29  7:39 ` [PATCH 5/5] crypto: arm/ghash - use variably sized key struct Ard Biesheuvel

Linux-Crypto Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-crypto/0 linux-crypto/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-crypto linux-crypto/ https://lore.kernel.org/linux-crypto \
		linux-crypto@vger.kernel.org
	public-inbox-index linux-crypto

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-crypto


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git